{"nbformat":4,"nbformat_minor":0,"metadata":{"accelerator":"GPU","colab":{"name":"pytorch_autograd_and_nn.ipynb","provenance":[{"file_id":"1mGz6ZqYX6FDsVZYqEB36M-NRv7fl_ds4","timestamp":1604299481963}],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.1"},"toc":{"nav_menu":{},"number_sections":true,"sideBar":true,"skip_h1_title":false,"toc_cell":false,"toc_position":{},"toc_section_display":"block","toc_window_display":false},"varInspector":{"cols":{"lenName":16,"lenType":16,"lenVar":40},"kernels_config":{"python":{"delete_cmd_postfix":"","delete_cmd_prefix":"del ","library":"var_list.py","varRefreshCmd":"print(var_dic_list())"},"r":{"delete_cmd_postfix":") ","delete_cmd_prefix":"rm(","library":"var_list.r","varRefreshCmd":"cat(var_dic_list()) "}},"types_to_exclude":["module","function","builtin_function_or_method","instance","_Feature"],"window_display":false},"widgets":{"application/vnd.jupyter.widget-state+json":{"a42a185be73e4bd79c28fa03e6fb92f8":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_43cbcb200e8944d981f59c13f2555c86","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_678dcae2390e45b99dd9d749d32dcb1a","IPY_MODEL_bf2cef012c024f9ab445db3f176dec6c"]}},"43cbcb200e8944d981f59c13f2555c86":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"678dcae2390e45b99dd9d749d32dcb1a":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_452868e936c843b29f4d5eb2a77753cd","_dom_classes":[],"description":"","_model_name":"FloatProgressModel","bar_style":"info","max":1,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":1,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_1d412ed887c94da1921d36fbeaff4bf6"}},"bf2cef012c024f9ab445db3f176dec6c":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_ebf56c93e11d44e29bd8bc5579153894","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 170500096/? [00:20&lt;00:00, 33215967.15it/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_885684ac4dd74557bebb83ef6958595e"}},"452868e936c843b29f4d5eb2a77753cd":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"1d412ed887c94da1921d36fbeaff4bf6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"ebf56c93e11d44e29bd8bc5579153894":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"885684ac4dd74557bebb83ef6958595e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}}}}},"cells":[{"cell_type":"markdown","metadata":{"id":"DDJwQPZcupab"},"source":["# EECS 498-007/598-005 Assignment 4-1: Pytorch Autograd and NN\n","\n","Before we start, please put your name and UMID in following format\n","\n",": Firstname LASTNAME, #00000000   //   e.g.) Justin JOHNSON, #12345678"]},{"cell_type":"markdown","metadata":{"id":"2KMxqLt1h2kx"},"source":["**Your Answer:**   \n","Hello WORLD, #XXXXXXXX"]},{"cell_type":"markdown","metadata":{"id":"aQW_w1Wzw72f","tags":["pdf-title"]},"source":["# torch.autograd and torch.nn\n","\n","So far, we used PyTorch to accelarate computation using GPU.\n","PyTorch also provides several useful packages, which help to design deep neural networks efficiently.\n","\n","The `torch.autograd` package provides classes and functions implementing automatic differentiation of arbitrary scalar valued functions.\n","\n","This notebook assumes that you are using **PyTorch version 1.3**.\n","In some of the previous versions (e.g. before 0.4), Tensors had to be wrapped in `torch.autograd.Variable` objects to enable autograd;\n","however this class has now been deprecated and merged with `torch.Tensor`.\n","In addition 1.0 also separates a Tensor's datatype from its device, and uses numpy-style factories for constructing Tensors rather than directly invoking Tensor constructors.\n","Now, to obtain gradients for a tensor via autograd from arbitrary scalar valued functions, you can simply set `requires_grad=True`.\n","\n","The `torch.nn` package defines a set of Modules, which you can think of as a neural network layer that has produces output from input and may have some trainable weights.\n","\n","You can also find the detailed [API doc](http://pytorch.org/docs/stable/index.html) here.\n","If you have other questions that are not addressed by the API docs, the [PyTorch forum](https://discuss.pytorch.org/) is a much better place to ask than StackOverflow.\n"]},{"cell_type":"markdown","metadata":{"id":"ED0jpoGyIL_B","tags":["pdf-ignore"]},"source":["# Table of Contents\n","\n","This assignment has 5 parts. You will learn PyTorch on **three different levels of abstraction**, which will help you understand it better.\n","\n","1. Part I, Preparation: As we always do, we will use CIFAR-10 dataset.\n","2. Part II, Barebones PyTorch: **Abstraction level 1**, we will work directly with the lowest-level PyTorch Tensors with autograd.\n","3. Part III, PyTorch Module API: **Abstraction level 2**, we will use `nn.Module` to define an arbitrary neural network architecture. \n","4. Part IV, PyTorch Sequential API: **Abstraction level 3**, we will use `nn.Sequential` to define a fully-connected and convolutional network very conveniently. \n","5. Part V, Residual Network: please implement your own ResNet to get a high accuracy on CIFAR-10.\n","\n","Here is a table of comparison:\n","\n","| API             | Flexibility | Convenience |\n","|-----------------|-------------|-------------|\n","| Barebone        | High        | Low         |\n","| `nn.Module`     | High        | Medium      |\n","| `nn.Sequential` | Low         | High        |"]},{"cell_type":"markdown","metadata":{"id":"7LCmGZ_3IL_V"},"source":["# Part I. Preparation"]},{"cell_type":"markdown","metadata":{"id":"ubB_0e-UAOVK"},"source":["## Install starter code\n","We will continue using the utility functions that we've used for previous assignments: [`coutils` package](https://github.com/deepvision-class/starter-code). Run this cell to download and install it.\n"]},{"cell_type":"code","metadata":{"id":"ASkY27ZtA7Is","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119635353,"user_tz":-480,"elapsed":14566,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"cee609fe-a3bc-4400-95aa-19280b50a993"},"source":["!pip install git+https://github.com/deepvision-class/starter-code"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Collecting git+https://github.com/deepvision-class/starter-code\n","  Cloning https://github.com/deepvision-class/starter-code to /tmp/pip-req-build-fardbfjh\n","  Running command git clone -q https://github.com/deepvision-class/starter-code /tmp/pip-req-build-fardbfjh\n","Requirement already satisfied: pydrive in /usr/local/lib/python3.6/dist-packages (from Colab-Utils==0.1.dev0) (1.3.1)\n","Requirement already satisfied: PyYAML>=3.0 in /usr/local/lib/python3.6/dist-packages (from pydrive->Colab-Utils==0.1.dev0) (3.13)\n","Requirement already satisfied: oauth2client>=4.0.0 in /usr/local/lib/python3.6/dist-packages (from pydrive->Colab-Utils==0.1.dev0) (4.1.3)\n","Requirement already satisfied: google-api-python-client>=1.2 in /usr/local/lib/python3.6/dist-packages (from pydrive->Colab-Utils==0.1.dev0) (1.7.12)\n","Requirement already satisfied: rsa>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from oauth2client>=4.0.0->pydrive->Colab-Utils==0.1.dev0) (4.6)\n","Requirement already satisfied: pyasn1>=0.1.7 in /usr/local/lib/python3.6/dist-packages (from oauth2client>=4.0.0->pydrive->Colab-Utils==0.1.dev0) (0.4.8)\n","Requirement already satisfied: httplib2>=0.9.1 in /usr/local/lib/python3.6/dist-packages (from oauth2client>=4.0.0->pydrive->Colab-Utils==0.1.dev0) (0.17.4)\n","Requirement already satisfied: pyasn1-modules>=0.0.5 in /usr/local/lib/python3.6/dist-packages (from oauth2client>=4.0.0->pydrive->Colab-Utils==0.1.dev0) (0.2.8)\n","Requirement already satisfied: six>=1.6.1 in /usr/local/lib/python3.6/dist-packages (from oauth2client>=4.0.0->pydrive->Colab-Utils==0.1.dev0) (1.15.0)\n","Requirement already satisfied: uritemplate<4dev,>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from google-api-python-client>=1.2->pydrive->Colab-Utils==0.1.dev0) (3.0.1)\n","Requirement already satisfied: google-auth-httplib2>=0.0.3 in /usr/local/lib/python3.6/dist-packages (from google-api-python-client>=1.2->pydrive->Colab-Utils==0.1.dev0) (0.0.4)\n","Requirement already satisfied: google-auth>=1.4.1 in /usr/local/lib/python3.6/dist-packages (from google-api-python-client>=1.2->pydrive->Colab-Utils==0.1.dev0) (1.17.2)\n","Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.4.1->google-api-python-client>=1.2->pydrive->Colab-Utils==0.1.dev0) (4.1.1)\n","Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.4.1->google-api-python-client>=1.2->pydrive->Colab-Utils==0.1.dev0) (50.3.2)\n","Building wheels for collected packages: Colab-Utils\n","  Building wheel for Colab-Utils (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for Colab-Utils: filename=Colab_Utils-0.1.dev0-cp36-none-any.whl size=10324 sha256=207e24bd0f201e9e57ade7a57bfd64c2452d9cc149efe8f0cae85a2fca843048\n","  Stored in directory: /tmp/pip-ephem-wheel-cache-kz3hjjr9/wheels/63/d1/27/a208931527abb98d326d00209f46c80c9d745851d6a1defd10\n","Successfully built Colab-Utils\n","Installing collected packages: Colab-Utils\n","Successfully installed Colab-Utils-0.1.dev0\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"MzqbYcKdz6ew"},"source":["## Setup code\n","Run some setup code for this notebook."]},{"cell_type":"code","metadata":{"id":"Q8o3FxatIL_X","tags":["pdf-ignore"],"executionInfo":{"status":"ok","timestamp":1606119640971,"user_tz":-480,"elapsed":20178,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["import coutils\n","from coutils import fix_random_seed\n","\n","from collections import OrderedDict\n","import torch\n","import torch.nn as nn\n","import torch.nn.functional as F\n","import torch.optim as optim\n","from torch.utils.data import DataLoader\n","from torch.utils.data import sampler\n","\n","import torchvision.datasets as dset\n","import torchvision.transforms as T\n","\n","# for plotting\n","import matplotlib.pyplot as plt\n","%matplotlib inline\n","plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots\n","plt.rcParams['image.interpolation'] = 'nearest'\n","plt.rcParams['image.cmap'] = 'gray'"],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"id":"-XB6NUX0IL_f","tags":["pdf-ignore"],"colab":{"base_uri":"https://localhost:8080/","height":117,"referenced_widgets":["a42a185be73e4bd79c28fa03e6fb92f8","43cbcb200e8944d981f59c13f2555c86","678dcae2390e45b99dd9d749d32dcb1a","bf2cef012c024f9ab445db3f176dec6c","452868e936c843b29f4d5eb2a77753cd","1d412ed887c94da1921d36fbeaff4bf6","ebf56c93e11d44e29bd8bc5579153894","885684ac4dd74557bebb83ef6958595e"]},"executionInfo":{"status":"ok","timestamp":1606119652037,"user_tz":-480,"elapsed":31028,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"c35af974-784b-4dbd-802a-46954585f441"},"source":["NUM_TRAIN = 49000\n","\n","# The torchvision.transforms package provides tools for preprocessing data\n","# and for performing data augmentation; here we set up a transform to\n","# preprocess the data by subtracting the mean RGB value and dividing by the\n","# standard deviation of each RGB value; we've hardcoded the mean and std.\n","transform = T.Compose([\n","                T.ToTensor(),\n","                T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))\n","            ])\n","\n","# We set up a Dataset object for each split (train / val / test); Datasets load\n","# training examples one at a time, so we wrap each Dataset in a DataLoader which\n","# iterates through the Dataset and forms minibatches. We divide the CIFAR-10\n","# training set into train and val sets by passing a Sampler object to the\n","# DataLoader telling how it should sample from the underlying Dataset.\n","cifar10_train = dset.CIFAR10('./datasets', train=True, download=True,\n","                             transform=transform)\n","loader_train = DataLoader(cifar10_train, batch_size=64, \n","                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))\n","\n","cifar10_val = dset.CIFAR10('./datasets', train=True, download=True,\n","                           transform=transform)\n","loader_val = DataLoader(cifar10_val, batch_size=64, \n","                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))\n","\n","cifar10_test = dset.CIFAR10('./datasets', train=False, download=True, \n","                            transform=transform)\n","loader_test = DataLoader(cifar10_test, batch_size=64)"],"execution_count":3,"outputs":[{"output_type":"stream","text":["Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./datasets/cifar-10-python.tar.gz\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a42a185be73e4bd79c28fa03e6fb92f8","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["Extracting ./datasets/cifar-10-python.tar.gz to ./datasets\n","Files already downloaded and verified\n","Files already downloaded and verified\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"FiookQItIL_p","tags":["pdf-ignore"]},"source":["Note that if CUDA is not enabled, `torch.cuda.is_available()` will return False and this notebook will fallback to CPU mode.\n","\n","The global variables `dtype` and `device` will control the data types throughout this assignment.\n","\n","We will be using `torch.float = torch.float32` for data and `torch.long = torch.int64` for labels.\n","\n","Please refer to https://pytorch.org/docs/stable/tensor_attributes.html#torch-dtype for more details about data types."]},{"cell_type":"code","metadata":{"id":"blz1sXlkIL_q","tags":["pdf-ignore-input"],"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119652040,"user_tz":-480,"elapsed":31010,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"cdf79477-f0e1-4a41-def7-eb09f1f248cf"},"source":["dtype = torch.float\n","ltype = torch.long\n","\n","if torch.cuda.is_available():\n","  device = torch.device('cuda:0')\n","else:\n","  device = torch.device('cpu')\n","\n","# Constant to control how frequently we print train loss\n","print_every = 100\n","\n","print('using device:', device)"],"execution_count":4,"outputs":[{"output_type":"stream","text":["using device: cuda:0\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"arcbqxO4AmAx"},"source":["Before we start, we define the flatten function for convenience."]},{"cell_type":"code","metadata":{"id":"xAhc4L8dzwbM","executionInfo":{"status":"ok","timestamp":1606119652042,"user_tz":-480,"elapsed":31010,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["def flatten(x, start_dim=1, end_dim=-1):\n","  return x.flatten(start_dim=start_dim, end_dim=end_dim)"],"execution_count":5,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"mazLauPlIL_5"},"source":["# Part II. Barebones PyTorch\n","\n","PyTorch ships with high-level APIs to help us define model architectures conveniently, which we will cover in Part II of this tutorial. In this section, we will start with the barebone PyTorch elements to understand the autograd engine better. After this exercise, you will come to appreciate the high-level model API more.\n","\n","We will start with a simple fully-connected ReLU network with two hidden layers and no biases for CIFAR classification. \n","This implementation computes the forward pass using operations on PyTorch Tensors, and uses PyTorch autograd to compute gradients. It is important that you understand every line, because you will write a harder version after the example.\n","\n","When we create a PyTorch Tensor with `requires_grad=True`, then operations involving that Tensor will not just compute values; they will also build up a computational graph in the background, allowing us to easily backpropagate through the graph to compute gradients of some Tensors with respect to a downstream loss. Concretely, if `x` is a Tensor with `x.requires_grad == True` then after backpropagation `x.grad` will be another Tensor holding the gradient of `x` with respect to the scalar loss at the end."]},{"cell_type":"markdown","metadata":{"id":"3rPnppVkIMAB","tags":["pdf-ignore"]},"source":["### Barebones PyTorch: Two-Layer Network\n","\n","Here we define a function `two_layer_fc` which performs the forward pass of a two-layer fully-connected ReLU network on a batch of image data. After defining the forward pass we check that it doesn't crash and that it produces outputs of the right shape by running zeros through the network.\n","\n","You don't have to write any code here, but it's important that you read and understand the implementation."]},{"cell_type":"code","metadata":{"id":"D6PqRQwlIMAC","tags":["pdf-ignore-input"],"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119652044,"user_tz":-480,"elapsed":30996,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"f0bad122-56ad-44e2-8d9d-a759a7867217"},"source":["def two_layer_fc(x, params):\n","  \"\"\"\n","  A fully-connected neural networks; the architecture is:\n","  NN is fully connected -> ReLU -> fully connected layer.\n","  Note that this function only defines the forward pass; \n","  PyTorch will take care of the backward pass for us.\n","  \n","  The input to the network will be a minibatch of data, of shape\n","  (N, d1, ..., dM) where d1 * ... * dM = D. The hidden layer will have H units,\n","  and the output layer will produce scores for C classes.\n","  \n","  Inputs:\n","  - x: A PyTorch Tensor of shape (N, d1, ..., dM) giving a minibatch of\n","    input data.\n","  - params: A list [w1, w2] of PyTorch Tensors giving weights for the network;\n","    w1 has shape (H, D) and w2 has shape (C, H).\n","  \n","  Returns:\n","  - scores: A PyTorch Tensor of shape (N, C) giving classification scores for\n","    the input data x.\n","  \"\"\"\n","  # first we flatten the image\n","  x = flatten(x)  # shape: [batch_size, C x H x W]\n","  \n","  w1, b1, w2, b2 = params\n","  \n","  # Forward pass: compute predicted y using operations on Tensors. Since w1 and\n","  # w2 have requires_grad=True, operations involving these Tensors will cause\n","  # PyTorch to build a computational graph, allowing automatic computation of\n","  # gradients. Since we are no longer implementing the backward pass by hand we\n","  # don't need to keep references to intermediate values.\n","  # Note that F.linear(x, w, b) is equivalent to x.mm(w.t()) + b\n","  # For ReLU, you can also use `.clamp(min=0)`, equivalent to `F.relu()`\n","  x = F.relu(F.linear(x, w1, b1))\n","  x = F.linear(x, w2, b2)\n","  return x\n","    \n","\n","def two_layer_fc_test():\n","  hidden_layer_size = 42\n","  x = torch.zeros((64, 3, 16, 16), dtype=dtype)  # minibatch size 64, feature dimension 3*16*16\n","  w1 = torch.zeros((hidden_layer_size, 3*16*16), dtype=dtype)\n","  b1 = torch.zeros((hidden_layer_size,), dtype=dtype)\n","  w2 = torch.zeros((10, hidden_layer_size), dtype=dtype)\n","  b2 = torch.zeros((10,), dtype=dtype)\n","  scores = two_layer_fc(x, [w1, b1, w2, b2])\n","  print('Output size:', list(scores.size()))  # you should see [64, 10]\n","\n","two_layer_fc_test()"],"execution_count":6,"outputs":[{"output_type":"stream","text":["Output size: [64, 10]\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"muCDvYEKIMAJ"},"source":["### Barebones PyTorch: Three-Layer ConvNet\n","\n","Here you will complete the implementation of the function `three_layer_convnet`, which will perform the forward pass of a three-layer convolutional network. Like above, we can immediately test our implementation by passing zeros through the network. The network should have the following architecture:\n","\n","1. A convolutional layer (with bias) with `channel_1` filters, each with shape `KW1 x KH1`, and zero-padding of two\n","2. ReLU nonlinearity\n","3. A convolutional layer (with bias) with `channel_2` filters, each with shape `KW2 x KH2`, and zero-padding of one\n","4. ReLU nonlinearity\n","5. Fully-connected layer with bias, producing scores for C classes.\n","\n","Note that we have **no softmax activation** here after our fully-connected layer: this is because PyTorch's cross entropy loss performs a softmax activation for you, and by bundling that step in makes computation more efficient.\n","\n","**HINT**: For convolutions: https://pytorch.org/docs/stable/nn.functional.html#torch.nn.functional.conv2d; pay attention to the shapes of convolutional filters!"]},{"cell_type":"code","metadata":{"id":"FHWVtcaEIMAM","executionInfo":{"status":"ok","timestamp":1606119652046,"user_tz":-480,"elapsed":30991,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["def three_layer_convnet(x, params):\n","  \"\"\"\n","  Performs the forward pass of a three-layer convolutional network with the\n","  architecture defined above.\n","\n","  Inputs:\n","  - x: A PyTorch Tensor of shape (N, C, H, W) giving a minibatch of images\n","  - params: A list of PyTorch Tensors giving the weights and biases for the\n","    network; should contain the following:\n","    - conv_w1: PyTorch Tensor of shape (channel_1, C, KH1, KW1) giving weights\n","      for the first convolutional layer\n","    - conv_b1: PyTorch Tensor of shape (channel_1,) giving biases for the first\n","      convolutional layer\n","    - conv_w2: PyTorch Tensor of shape (channel_2, channel_1, KH2, KW2) giving\n","      weights for the second convolutional layer\n","    - conv_b2: PyTorch Tensor of shape (channel_2,) giving biases for the second\n","      convolutional layer\n","    - fc_w: PyTorch Tensor giving weights for the fully-connected layer. Can you\n","      figure out what the shape should be?\n","    - fc_b: PyTorch Tensor giving biases for the fully-connected layer. Can you\n","      figure out what the shape should be?\n","  \n","  Returns:\n","  - scores: PyTorch Tensor of shape (N, C) giving classification scores for x\n","  \"\"\"\n","  conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b = params\n","  scores = None\n","  ##############################################################################\n","  # TODO: Implement the forward pass for the three-layer ConvNet.              #\n","  # Hint: F.linear, F.conv2d, F.relu, flatten                                  #\n","  ##############################################################################\n","  # Replace \"pass\" statement with your code\n","  # l1_output = F.relu ( F.conv2d(x, conv_w1, conv_b1, padding = 2) )\n","  l1_output = F.relu(F.conv2d(x, conv_w1, conv_b1, padding=2))\n","  # l2_output = F.relu ( F.conv2d(l1_output, conv_w2, conv_b2, padding = 1) )\n","  l2_output = F.relu(F.conv2d(l1_output, conv_w2, conv_b2, padding=1))\n","  # scores = F.linear(flatten(l2_output), fc_w, fc_b)\n","  scores = F.linear(flatten(l2_output), fc_w, fc_b)\n","  ################################################################################\n","  #                                 END OF YOUR CODE                             #\n","  ################################################################################\n","  return scores"],"execution_count":7,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Z7Cg9qvTIMAR"},"source":["After defining the forward pass of the ConvNet above, run the following cell to test your implementation.\n","\n","When you run this function, scores should have shape (64, 10)."]},{"cell_type":"code","metadata":{"id":"1kEMMi4QIMAa","tags":["pdf-ignore-input"],"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119652048,"user_tz":-480,"elapsed":30954,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"6cb8a681-234b-49e4-8e86-d23373c52fcc"},"source":["def three_layer_convnet_test():\n","  x = torch.zeros((64, 3, 32, 32), dtype=dtype)  # minibatch size 64, image size [3, 32, 32]\n","\n","  conv_w1 = torch.zeros((6, 3, 5, 5), dtype=dtype)  # [out_channel, in_channel, kernel_H, kernel_W]\n","  conv_b1 = torch.zeros((6,))  # out_channel\n","  conv_w2 = torch.zeros((9, 6, 3, 3), dtype=dtype)  # [out_channel, in_channel, kernel_H, kernel_W]\n","  conv_b2 = torch.zeros((9,))  # out_channel\n","\n","  # you must calculate the shape of the tensor after two conv layers, before the fully-connected layer\n","  fc_w = torch.zeros((10, 9 * 32 * 32))\n","  fc_b = torch.zeros(10)\n","\n","  scores = three_layer_convnet(x, [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b])\n","  print('Output size:', list(scores.size()))  # you should see [64, 10]\n","three_layer_convnet_test()"],"execution_count":8,"outputs":[{"output_type":"stream","text":["Output size: [64, 10]\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"WBStmY2yIMAh"},"source":["### Barebones PyTorch: Kaiming Initialization\n","In this part, we are going to use [Kaiming initialization](https://arxiv.org/abs/1502.01852), which you already implemented in Assignment 3.\n","\n","Fortunately, PyTorch already provides a function, so we will use this:\n","[torch.nn.init.kaiming_normal_](https://pytorch.org/docs/stable/nn.init.html#torch.nn.init.kaiming_normal_)\n","\n","By default, `gain = 2`, because this function assumes that ReLU activation follows.\n","This is true in the linear and convolutional layers in the models you are going to implement, except for the last fully-connected layer:\n","in principle, we should give `gain = 1` because ReLU is not applied there.\n","However, as stated in the [original paper](https://arxiv.org/abs/1502.01852), since the factor 1/2 does not matter if it just exists on one layer, we are going to keep using `gain = 2` for simplicity.\n","\n","For more details on initialization methods provided by PyTorch, see https://pytorch.org/docs/stable/nn.init.html."]},{"cell_type":"code","metadata":{"id":"_rf9JRh5IMAj","tags":["pdf-ignore-input"],"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119674650,"user_tz":-480,"elapsed":53540,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"4d7466bb-7b20-484d-a1bf-9b0e82733653"},"source":["fix_random_seed(0)\n","# 关于 weight 和 bias 初始化的问题\n","# create a weight of shape [3 x 5]\n","print(nn.init.kaiming_normal_(torch.empty(3,5, dtype=dtype, device=device)))\n","print(nn.init.zeros_(torch.empty(3,5, dtype=dtype, device=device)))"],"execution_count":9,"outputs":[{"output_type":"stream","text":["tensor([[-0.5848, -0.2690, -1.6721,  0.0918, -0.0764],\n","        [-0.3667, -0.3939, -0.2077, -0.6796, -0.2297],\n","        [-1.0569,  1.4328,  0.1971, -0.1165,  0.8137]], device='cuda:0')\n","tensor([[0., 0., 0., 0., 0.],\n","        [0., 0., 0., 0., 0.],\n","        [0., 0., 0., 0., 0.]], device='cuda:0')\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"akQqCObPIMAo"},"source":["### Barebones PyTorch: Check Accuracy\n","When training the model we will use the following function to check the accuracy of our model on the training or validation sets.\n","\n","When checking accuracy we don't need to compute any gradients; as a result we don't need PyTorch to build a computational graph for us when we compute scores. To prevent a graph from being built we scope our computation under a `torch.no_grad()` context manager."]},{"cell_type":"code","metadata":{"id":"l4xAUWASIMAq","tags":["pdf-ignore-input"],"executionInfo":{"status":"ok","timestamp":1606119674651,"user_tz":-480,"elapsed":53537,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["def check_accuracy_part2(loader, model_fn, params):\n","  \"\"\"\n","  Check the accuracy of a classification model.\n","  \n","  Inputs:\n","  - loader: A DataLoader for the data split we want to check\n","  - model_fn: A function that performs the forward pass of the model,\n","    with the signature scores = model_fn(x, params)\n","  - params: List of PyTorch Tensors giving parameters of the model\n","  \n","  Returns: Nothing, but prints the accuracy of the model\n","  \"\"\"\n","  split = 'val' if loader.dataset.train else 'test'\n","  print('Checking accuracy on the %s set' % split)\n","  num_correct, num_samples = 0, 0\n","  with torch.no_grad():\n","    for x, y in loader:\n","      x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU\n","      y = y.to(device=device, dtype=ltype)\n","      scores = model_fn(x, params)\n","      _, preds = scores.max(1)  # 求每行的最大值的下标\n","      num_correct += (preds == y).sum()\n","      num_samples += preds.size(0)\n","    acc = float(num_correct) / num_samples\n","    print('Got %d / %d correct (%.2f%%)' % (num_correct, num_samples, 100 * acc))"],"execution_count":10,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"8LvPbM4WIMAv"},"source":["### BareBones PyTorch: Training Loop\n","We can now set up a basic training loop to train our network. We will train the model using stochastic gradient descent without momentum. We will use `torch.nn.functional.cross_entropy` to compute the loss; you can [read about it here](https://pytorch.org/docs/stable/nn.html#crossentropyloss).\n","\n","The training loop takes as input the neural network function, a list of initialized parameters (`[w1, w2]` in our example), and learning rate."]},{"cell_type":"code","metadata":{"id":"WYGBD0YZIMAx","tags":["pdf-ignore-input"],"executionInfo":{"status":"ok","timestamp":1606119674654,"user_tz":-480,"elapsed":53537,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["def train_part2(model_fn, params, learning_rate):\n","  \"\"\"\n","  Train a model on CIFAR-10.\n","  \n","  Inputs:\n","  - model_fn: A Python function that performs the forward pass of the model.\n","    It should have the signature scores = model_fn(x, params) where x is a\n","    PyTorch Tensor of image data, params is a list of PyTorch Tensors giving\n","    model weights, and scores is a PyTorch Tensor of shape (N, C) giving\n","    scores for the elements in x.\n","  - params: List of PyTorch Tensors giving weights for the model\n","  - learning_rate: Python scalar giving the learning rate to use for SGD\n","  \n","  Returns: Nothing\n","  \"\"\"\n","  for t, (x, y) in enumerate(loader_train):\n","    # Move the data to the proper device (GPU or CPU)\n","    x = x.to(device=device, dtype=dtype)\n","    y = y.to(device=device, dtype=ltype)\n","\n","    # Forward pass: compute scores and loss\n","    scores = model_fn(x, params)\n","    loss = F.cross_entropy(scores, y)\n","\n","    # Backward pass: PyTorch figures out which Tensors in the computational\n","    # graph has requires_grad=True and uses backpropagation to compute the\n","    # gradient of the loss with respect to these Tensors, and stores the\n","    # gradients in the .grad attribute of each Tensor.\n","    loss.backward()\n","\n","    # Update parameters. We don't want to backpropagate through the\n","    # parameter updates, so we scope the updates under a torch.no_grad()\n","    # context manager to prevent a computational graph from being built.\n","    with torch.no_grad():\n","      for w in params:\n","        if w.requires_grad:\n","          w -= learning_rate * w.grad\n","\n","          # Manually zero the gradients after running the backward pass\n","          w.grad.zero_()\n","\n","    if t % print_every == 0 or t == len(loader_train)-1:\n","      print('Iteration %d, loss = %.4f' % (t, loss.item()))\n","      check_accuracy_part2(loader_val, model_fn, params)\n","      print()"],"execution_count":11,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"gkTNXK9cIMA6"},"source":["### BareBones PyTorch: Train a Two-Layer Network\n","Now we are ready to run the training loop. We need to explicitly allocate tensors for the fully connected weights, `w1` and `w2`. \n","\n","Each minibatch of CIFAR has 64 examples, so the tensor shape is `[64, 3, 32, 32]`. \n","\n","After flattening, `x` shape should be `[64, 3 * 32 * 32]`. This will be the size of the second dimension of `w1`. \n","The first dimension of `w1` is the hidden layer size, which will also be the second dimension of `w2`. \n","\n","Finally, the output of the network is a 10-dimensional vector that represents the probability distribution over 10 classes. \n","\n","You don't need to tune any hyperparameters but you should see accuracies above 40% after training for one epoch."]},{"cell_type":"code","metadata":{"id":"OSBSy0JTIMA8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119678843,"user_tz":-480,"elapsed":57707,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"e2eed594-ce4a-4f3f-b216-e9fe3c5a2636"},"source":["fix_random_seed(0)\n","\n","C, H, W = 3, 32, 32\n","num_classes = 10\n","\n","hidden_layer_size = 4000\n","learning_rate = 1e-2\n","\n","w1 = nn.init.kaiming_normal_(torch.empty(hidden_layer_size, C*H*W, dtype=dtype, device=device))\n","w1.requires_grad = True\n","b1 = nn.init.zeros_(torch.empty(hidden_layer_size, dtype=dtype, device=device))\n","b1.requires_grad = True\n","w2 = nn.init.kaiming_normal_(torch.empty(num_classes, hidden_layer_size, dtype=dtype, device=device))\n","w2.requires_grad = True\n","b2 = nn.init.zeros_(torch.empty(num_classes, dtype=dtype, device=device))\n","b2.requires_grad = True\n","\n","train_part2(two_layer_fc, [w1, b1, w2, b2], learning_rate)"],"execution_count":12,"outputs":[{"output_type":"stream","text":["Iteration 0, loss = 3.5316\n","Checking accuracy on the val set\n","Got 119 / 1000 correct (11.90%)\n","\n","Iteration 100, loss = 2.9487\n","Checking accuracy on the val set\n","Got 359 / 1000 correct (35.90%)\n","\n","Iteration 200, loss = 2.1806\n","Checking accuracy on the val set\n","Got 396 / 1000 correct (39.60%)\n","\n","Iteration 300, loss = 1.8986\n","Checking accuracy on the val set\n","Got 407 / 1000 correct (40.70%)\n","\n","Iteration 400, loss = 1.5620\n","Checking accuracy on the val set\n","Got 420 / 1000 correct (42.00%)\n","\n","Iteration 500, loss = 2.1983\n","Checking accuracy on the val set\n","Got 428 / 1000 correct (42.80%)\n","\n","Iteration 600, loss = 1.8740\n","Checking accuracy on the val set\n","Got 421 / 1000 correct (42.10%)\n","\n","Iteration 700, loss = 1.7949\n","Checking accuracy on the val set\n","Got 436 / 1000 correct (43.60%)\n","\n","Iteration 765, loss = 1.5785\n","Checking accuracy on the val set\n","Got 402 / 1000 correct (40.20%)\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"7K_n_BZPIMBB"},"source":["### BareBones PyTorch: Training a ConvNet\n","\n","In the below you should use the functions defined above to train a three-layer convolutional network on CIFAR. The network should have the following architecture:\n","\n","1. Convolutional layer (with bias) with 32 5x5 filters, with zero-padding of 2\n","2. ReLU\n","3. Convolutional layer (with bias) with 16 3x3 filters, with zero-padding of 1\n","4. ReLU\n","5. Fully-connected layer (with bias) to compute scores for 10 classes\n","\n","You should initialize your weight matrices using the `random_weight` function defined above, and you should initialize your bias vectors using the `zero_weight` function above.\n","\n","You don't need to tune any hyperparameters, but if everything works correctly you should achieve an accuracy above 45% after one epoch."]},{"cell_type":"code","metadata":{"id":"wBRWytEzIMBC","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119700460,"user_tz":-480,"elapsed":79298,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"2cc9f60d-9727-4d4b-ac55-a5618cf6d037"},"source":["fix_random_seed(0)\n","\n","C, H, W = 3, 32, 32\n","num_classes = 10\n","\n","channel_1 = 32\n","channel_2 = 16\n","kernel_size_1 = 5\n","kernel_size_2 = 3\n","\n","learning_rate = 3e-3\n","\n","conv_w1 = None\n","conv_b1 = None\n","conv_w2 = None\n","conv_b2 = None\n","fc_w = None\n","fc_b = None\n","\n","################################################################################\n","# TODO: Define and initialize the parameters of a three-layer ConvNet          #\n","#       using nn.init.kaiming_normal_.                                         #\n","################################################################################\n","# Replace \"pass\" statement with your code\n","conv_w1 = nn.init.kaiming_normal_(torch.empty(channel_1, C, kernel_size_1, kernel_size_1, dtype=dtype, device=device))\n","conv_w1.requires_grad = True\n","conv_b1 = nn.init.zeros_(torch.empty((channel_1,), dtype=dtype, device=device))\n","conv_b1.requires_grad = True\n","conv_w2 = nn.init.kaiming_normal_(torch.empty(channel_2, channel_1, kernel_size_2, kernel_size_2, dtype=dtype, device=device))\n","conv_w2.requires_grad = True\n","conv_b2 = nn.init.zeros_(torch.empty((channel_2,), dtype=dtype, device=device))\n","conv_b2.requires_grad = True\n","\n","fc_w = nn.init.kaiming_normal_(torch.empty(num_classes, channel_2*H*W, dtype=dtype, device=device))\n","fc_w.requires_grad = True\n","fc_b = nn.init.zeros_(torch.empty((num_classes,), dtype=dtype, device=device))\n","fc_b.requires_grad = True\n","################################################################################\n","#                                 END OF YOUR CODE                             #\n","################################################################################\n","\n","params = [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b]\n","train_part2(three_layer_convnet, params, learning_rate)"],"execution_count":13,"outputs":[{"output_type":"stream","text":["Iteration 0, loss = 2.4780\n","Checking accuracy on the val set\n","Got 111 / 1000 correct (11.10%)\n","\n","Iteration 100, loss = 1.8854\n","Checking accuracy on the val set\n","Got 348 / 1000 correct (34.80%)\n","\n","Iteration 200, loss = 1.9930\n","Checking accuracy on the val set\n","Got 399 / 1000 correct (39.90%)\n","\n","Iteration 300, loss = 1.5761\n","Checking accuracy on the val set\n","Got 417 / 1000 correct (41.70%)\n","\n","Iteration 400, loss = 1.5247\n","Checking accuracy on the val set\n","Got 446 / 1000 correct (44.60%)\n","\n","Iteration 500, loss = 1.7096\n","Checking accuracy on the val set\n","Got 452 / 1000 correct (45.20%)\n","\n","Iteration 600, loss = 1.5374\n","Checking accuracy on the val set\n","Got 478 / 1000 correct (47.80%)\n","\n","Iteration 700, loss = 1.7430\n","Checking accuracy on the val set\n","Got 487 / 1000 correct (48.70%)\n","\n","Iteration 765, loss = 1.3109\n","Checking accuracy on the val set\n","Got 474 / 1000 correct (47.40%)\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"YXGdsYInIMBH"},"source":["# Part III. PyTorch Module API\n","\n","Barebone PyTorch requires that we track all the parameter tensors by hand. This is fine for small networks with a few tensors, but it would be extremely inconvenient and error-prone to track tens or hundreds of tensors in larger networks.\n","\n","PyTorch provides the `nn.Module` API for you to define arbitrary network architectures, while tracking every learnable parameters for you. In Part II, we implemented SGD ourselves. PyTorch also provides the `torch.optim` package that implements all the common optimizers, such as RMSProp, Adagrad, and Adam. It even supports approximate second-order methods like L-BFGS! You can refer to the [doc](http://pytorch.org/docs/master/optim.html) for the exact specifications of each optimizer.\n","\n","To use the Module API, follow the steps below:\n","\n","1. Subclass `nn.Module`. Give your network class an intuitive name like `TwoLayerFC`. \n","\n","2. In the constructor `__init__()`, define all the layers you need as class attributes. Layer objects like `nn.Linear` and `nn.Conv2d` are themselves `nn.Module` subclasses and contain learnable parameters, so that you don't have to instantiate the raw tensors yourself. `nn.Module` will track these internal parameters for you. Refer to the [doc](http://pytorch.org/docs/master/nn.html) to learn more about the dozens of builtin layers. **Warning**: don't forget to call the `super().__init__()` first!\n","\n","3. In the `forward()` method, define the *connectivity* of your network. You should use the attributes defined in `__init__` as function calls that take tensor as input and output the \"transformed\" tensor. Do *not* create any new layers with learnable parameters in `forward()`! All of them must be declared upfront in `__init__`. \n","\n","After you define your Module subclass, you can instantiate it as an object and call it just like the NN forward function in part II.\n","\n","### Module API: Two-Layer Network\n","Here is a concrete example of a 2-layer fully connected network.\n","We use `nn.init.kaiming_normal_` to initialize weights using Kaiming initialization, and `nn.init.kaiming_zeros_` to initialize biases."]},{"cell_type":"code","metadata":{"id":"2Ue0_Cf1IMBJ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119700461,"user_tz":-480,"elapsed":79268,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"a8ecaebc-9af1-45b5-d9ae-01720e3c4ad1"},"source":["class TwoLayerFC(nn.Module):\n","  def __init__(self, input_size, hidden_size, num_classes):\n","    super().__init__()\n","    # assign layer objects to class attributes\n","    self.fc1 = nn.Linear(input_size, hidden_size)\n","    self.fc2 = nn.Linear(hidden_size, num_classes)\n","    # nn.init package contains convenient initialization methods\n","    # https://pytorch.org/docs/stable/nn.init.html#torch.nn.init.kaiming_normal_ \n","    nn.init.kaiming_normal_(self.fc1.weight)\n","    nn.init.kaiming_normal_(self.fc2.weight)\n","    nn.init.zeros_(self.fc1.bias)\n","    nn.init.zeros_(self.fc2.bias)\n","  \n","  def forward(self, x):\n","    # forward always defines connectivity\n","    x = flatten(x)\n","    scores = self.fc2(F.relu(self.fc1(x)))\n","    return scores\n","\n","def test_TwoLayerFC():\n","  input_size = 3*16*16\n","  x = torch.zeros((64, input_size), dtype=dtype)  # minibatch size 64, feature dimension 3*16*16\n","  model = TwoLayerFC(input_size, 42, 10)\n","  scores = model(x)\n","  print('Architecture:')\n","  print(model) # printing `nn.Module` shows the architecture of the module.\n","  print('Output size:', list(scores.size()))  # you should see [64, 10]\n","test_TwoLayerFC()"],"execution_count":14,"outputs":[{"output_type":"stream","text":["Architecture:\n","TwoLayerFC(\n","  (fc1): Linear(in_features=768, out_features=42, bias=True)\n","  (fc2): Linear(in_features=42, out_features=10, bias=True)\n",")\n","Output size: [64, 10]\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"CruYc2HJIMBP"},"source":["### Module API: Three-Layer ConvNet\n","It's your turn to implement a 3-layer ConvNet followed by a fully connected layer. The network architecture should be the same as in Part II:\n","\n","1. Convolutional layer with `channel_1` 5x5 filters with zero-padding of 2\n","2. ReLU\n","3. Convolutional layer with `channel_2` 3x3 filters with zero-padding of 1\n","4. ReLU\n","5. Fully-connected layer to `num_classes` classes\n","\n","We assume that the size of the input of this network is `H = W = 32`, and there is no pooing; this information is required when computing the number of input channels in the last fully-connected layer.\n","You should initialize the weight matrices of the model using Kaiming normal initialization, and zero out the bias vectors.\n","\n","**HINT**: [torch.nn.Conv2d](https://pytorch.org/docs/stable/nn.html#conv2d), [torch.nn.init.kaiming_normal_](https://pytorch.org/docs/stable/nn.init.html#torch.nn.init.kaiming_normal_), [torch.nn.init.zeros_](https://pytorch.org/docs/stable/nn.init.html#torch.nn.init.zeros_)\n","\n","After you implement the three-layer ConvNet, the `test_ThreeLayerConvNet` function will run your implementation; it should print `(64, 10)` for the shape of the output scores."]},{"cell_type":"code","metadata":{"id":"R58EqBTYIMBU","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119700463,"user_tz":-480,"elapsed":79236,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"5e826370-0491-44ff-edf7-3ba974ebb998"},"source":["class ThreeLayerConvNet(nn.Module):\n","  def __init__(self, in_channel, channel_1, channel_2, num_classes):\n","    super().__init__()\n","    ########################################################################\n","    # TODO: Set up the layers you need for a three-layer ConvNet with the  #\n","    # architecture defined above.                                          #\n","    ########################################################################\n","    # Replace \"pass\" statement with your code\n","    self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=channel_1, kernel_size=5,padding=2)\n","    self.conv2 = nn.Conv2d(channel_1, channel_2, kernel_size=3, padding=1)\n","    self.fc = nn.Linear(channel_2*32*32, num_classes)\n","\n","    nn.init.kaiming_normal_(self.conv1.weight)\n","    nn.init.kaiming_normal_(self.conv2.weight)\n","    nn.init.kaiming_normal_(self.fc.weight)\n","    nn.init.zeros_(self.conv1.bias)\n","    nn.init.zeros_(self.conv2.bias)\n","    nn.init.zeros_(self.fc.bias)\n","    ########################################################################\n","    #                           END OF YOUR CODE                           #     \n","    ########################################################################\n","\n","  def forward(self, x):\n","    scores = None\n","    ########################################################################\n","    # TODO: Implement the forward function for a 3-layer ConvNet. you      #\n","    # should use the layers you defined in __init__ and specify the        #\n","    # connectivity of those layers in forward()                            #\n","    ########################################################################\n","    # Replace \"pass\" statement with your code\n","    # x = flatten(x, start_dim=2)\n","    x = F.relu(self.conv1(x))\n","    x = F.relu(self.conv2(x))\n","    scores = self.fc(flatten(x))\n","    ########################################################################\n","    #                            END OF YOUR CODE                          #\n","    ########################################################################\n","    return scores\n","\n","\n","def test_ThreeLayerConvNet():\n","  x = torch.zeros((64, 3, 32, 32), dtype=dtype)  # minibatch size 64, image size [3, 32, 32]\n","  model = ThreeLayerConvNet(in_channel=3, channel_1=12, channel_2=8, num_classes=10)\n","  scores = model(x)\n","  print(model) # printing `nn.Module` shows the architecture of the module.\n","  print('Output size:', list(scores.size()))  # you should see [64, 10]\n","test_ThreeLayerConvNet()"],"execution_count":15,"outputs":[{"output_type":"stream","text":["ThreeLayerConvNet(\n","  (conv1): Conv2d(3, 12, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n","  (conv2): Conv2d(12, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","  (fc): Linear(in_features=8192, out_features=10, bias=True)\n",")\n","Output size: [64, 10]\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"3efFpwV_IMBZ"},"source":["### Module API: Check Accuracy\n","Given the validation or test set, we can check the classification accuracy of a neural network. \n","\n","This version is slightly different from the one in part II. You don't manually pass in the parameters anymore."]},{"cell_type":"code","metadata":{"id":"LpgKJLVbIMBb","executionInfo":{"status":"ok","timestamp":1606119700464,"user_tz":-480,"elapsed":79234,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["def check_accuracy_part34(loader, model):\n","  if loader.dataset.train:\n","    print('Checking accuracy on validation set')\n","  else:\n","    print('Checking accuracy on test set')   \n","  num_correct = 0\n","  num_samples = 0\n","  model.eval()  # set model to evaluation mode\n","  with torch.no_grad():\n","    for x, y in loader:\n","      x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU\n","      y = y.to(device=device, dtype=ltype)\n","      scores = model(x)\n","      _, preds = scores.max(1)\n","      num_correct += (preds == y).sum()\n","      num_samples += preds.size(0)\n","    acc = float(num_correct) / num_samples\n","    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))\n","  return acc"],"execution_count":16,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"-tmOMi8SIMBj"},"source":["### Module API: Training Loop\n","We also use a slightly different training loop. Rather than updating the values of the weights ourselves, we use an Optimizer object from the `torch.optim` package, which abstract the notion of an optimization algorithm and provides implementations of most of the algorithms commonly used to optimize neural networks."]},{"cell_type":"code","metadata":{"id":"HLJjvtu1IMBm","executionInfo":{"status":"ok","timestamp":1606119700467,"user_tz":-480,"elapsed":79235,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["def adjust_learning_rate(optimizer, lrd, epoch, schedule):\n","  \"\"\"\n","  Multiply lrd to the learning rate if epoch is in schedule\n","  \n","  Inputs:\n","  - optimizer: An Optimizer object we will use to train the model\n","  - lrd: learning rate decay; a factor multiplied at scheduled epochs\n","  - epochs: the current epoch number\n","  - schedule: the list of epochs that requires learning rate update\n","  \n","  Returns: Nothing, but learning rate might be updated\n","  \"\"\"\n","  if epoch in schedule:\n","    for param_group in optimizer.param_groups:\n","      print('lr decay from {} to {}'.format(param_group['lr'], param_group['lr'] * lrd))\n","      param_group['lr'] *= lrd\n","\n","def train_part345(model, optimizer, epochs=1, learning_rate_decay=.1, schedule=[], verbose=True):\n","  \"\"\"\n","  Train a model on CIFAR-10 using the PyTorch Module API.\n","  \n","  Inputs:\n","  - model: A PyTorch Module giving the model to train.\n","  - optimizer: An Optimizer object we will use to train the model\n","  - epochs: (Optional) A Python integer giving the number of epochs to train for\n","  \n","  Returns: Nothing, but prints model accuracies during training.\n","  \"\"\"\n","  model = model.to(device=device)  # move the model parameters to CPU/GPU\n","  num_iters = epochs * len(loader_train)\n","  if verbose:\n","    num_prints = num_iters // print_every + 1\n","  else:\n","    num_prints = epochs\n","  acc_history = torch.zeros(num_prints, dtype=torch.float)\n","  iter_history = torch.zeros(num_prints, dtype=torch.long)\n","  for e in range(epochs):\n","    \n","    adjust_learning_rate(optimizer, learning_rate_decay, e, schedule)\n","    \n","    for t, (x, y) in enumerate(loader_train):\n","      model.train()  # put model to training mode\n","      x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU\n","      y = y.to(device=device, dtype=ltype)\n","\n","      scores = model(x)\n","      loss = F.cross_entropy(scores, y)\n","\n","      # Zero out all of the gradients for the variables which the optimizer\n","      # will update.\n","      optimizer.zero_grad()\n","\n","      # This is the backwards pass: compute the gradient of the loss with\n","      # respect to each  parameter of the model.\n","      loss.backward()\n","\n","      # Actually update the parameters of the model using the gradients\n","      # computed by the backwards pass.\n","      optimizer.step()\n","\n","      tt = t + e * len(loader_train)\n","\n","      if verbose and (tt % print_every == 0 or (e == epochs-1 and t == len(loader_train)-1)):\n","        print('Epoch %d, Iteration %d, loss = %.4f' % (e, tt, loss.item()))\n","        acc = check_accuracy_part34(loader_val, model)\n","        acc_history[tt // print_every] = acc\n","        iter_history[tt // print_every] = tt\n","        print()\n","      elif not verbose and (t == len(loader_train)-1):\n","        print('Epoch %d, Iteration %d, loss = %.4f' % (e, tt, loss.item()))\n","        acc = check_accuracy_part34(loader_val, model)\n","        acc_history[e] = acc\n","        iter_history[e] = tt\n","        print()\n","  return acc_history, iter_history"],"execution_count":17,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"uFQs2WWKIMBu"},"source":["### Module API: Train a Two-Layer Network\n","Now we are ready to run the training loop. In contrast to part II, we don't explicitly allocate parameter tensors anymore.\n","\n","Simply pass the input size, hidden layer size, and number of classes (i.e. output size) to the constructor of `TwoLayerFC`. \n","\n","You also need to define an optimizer that tracks all the learnable parameters inside `TwoLayerFC`.\n","\n","You don't need to tune any hyperparameters, but you should see model accuracies above 40% after training for one epoch."]},{"cell_type":"code","metadata":{"id":"v4Od-a6_IMBv","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119717231,"user_tz":-480,"elapsed":95976,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"b97c362e-b26c-496d-adc2-ce7ae4e52601"},"source":["fix_random_seed(0)\n","\n","C, H, W = 3, 32, 32\n","num_classes = 10\n","\n","hidden_layer_size = 4000\n","learning_rate = 1e-2\n","weight_decay = 1e-4\n","\n","model = TwoLayerFC(C*H*W, hidden_layer_size, num_classes)\n","\n","optimizer = optim.SGD(model.parameters(), lr=learning_rate,\n","                      weight_decay=weight_decay)\n","\n","_ = train_part345(model, optimizer)"],"execution_count":18,"outputs":[{"output_type":"stream","text":["Epoch 0, Iteration 0, loss = 3.3988\n","Checking accuracy on validation set\n","Got 139 / 1000 correct (13.90)\n","\n","Epoch 0, Iteration 100, loss = 2.9728\n","Checking accuracy on validation set\n","Got 328 / 1000 correct (32.80)\n","\n","Epoch 0, Iteration 200, loss = 2.1079\n","Checking accuracy on validation set\n","Got 335 / 1000 correct (33.50)\n","\n","Epoch 0, Iteration 300, loss = 2.1675\n","Checking accuracy on validation set\n","Got 432 / 1000 correct (43.20)\n","\n","Epoch 0, Iteration 400, loss = 1.9809\n","Checking accuracy on validation set\n","Got 432 / 1000 correct (43.20)\n","\n","Epoch 0, Iteration 500, loss = 1.7846\n","Checking accuracy on validation set\n","Got 445 / 1000 correct (44.50)\n","\n","Epoch 0, Iteration 600, loss = 2.1037\n","Checking accuracy on validation set\n","Got 469 / 1000 correct (46.90)\n","\n","Epoch 0, Iteration 700, loss = 1.6775\n","Checking accuracy on validation set\n","Got 474 / 1000 correct (47.40)\n","\n","Epoch 0, Iteration 765, loss = 1.6805\n","Checking accuracy on validation set\n","Got 409 / 1000 correct (40.90)\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"J_35SZYHIMB4"},"source":["### Module API: Train a Three-Layer ConvNet\n","You should now use the Module API to train a three-layer ConvNet on CIFAR. This should look very similar to training the two-layer network! You don't need to tune any hyperparameters, but you should achieve above above 45% after training for one epoch.\n","\n","You should train the model using stochastic gradient descent without momentum, with L2 weight decay of 1e-4."]},{"cell_type":"code","metadata":{"id":"-bIRiwOJIMB6","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119736818,"user_tz":-480,"elapsed":115546,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"1d12fd84-e659-450d-8875-b27d2233218a"},"source":["fix_random_seed(0)\n","\n","C = 3\n","num_classes = 10\n","\n","channel_1 = 32\n","channel_2 = 16\n","\n","learning_rate = 3e-3\n","weight_decay = 1e-4\n","\n","model = None\n","optimizer = None\n","################################################################################\n","# TODO: Instantiate your ThreeLayerConvNet model and a corresponding optimizer.#\n","################################################################################\n","# Replace \"pass\" statement with your code\n","model = ThreeLayerConvNet(C, channel_1, channel_2, num_classes)\n","optimizer = optim.SGD(model.parameters(), lr=learning_rate,\n","                      weight_decay=weight_decay)\n","################################################################################\n","#                                 END OF YOUR CODE                             \n","################################################################################\n","\n","_ = train_part345(model, optimizer)"],"execution_count":19,"outputs":[{"output_type":"stream","text":["Epoch 0, Iteration 0, loss = 3.5309\n","Checking accuracy on validation set\n","Got 120 / 1000 correct (12.00)\n","\n","Epoch 0, Iteration 100, loss = 1.9049\n","Checking accuracy on validation set\n","Got 352 / 1000 correct (35.20)\n","\n","Epoch 0, Iteration 200, loss = 1.7809\n","Checking accuracy on validation set\n","Got 412 / 1000 correct (41.20)\n","\n","Epoch 0, Iteration 300, loss = 1.7487\n","Checking accuracy on validation set\n","Got 461 / 1000 correct (46.10)\n","\n","Epoch 0, Iteration 400, loss = 1.3962\n","Checking accuracy on validation set\n","Got 457 / 1000 correct (45.70)\n","\n","Epoch 0, Iteration 500, loss = 1.6607\n","Checking accuracy on validation set\n","Got 480 / 1000 correct (48.00)\n","\n","Epoch 0, Iteration 600, loss = 1.3305\n","Checking accuracy on validation set\n","Got 486 / 1000 correct (48.60)\n","\n","Epoch 0, Iteration 700, loss = 1.6026\n","Checking accuracy on validation set\n","Got 505 / 1000 correct (50.50)\n","\n","Epoch 0, Iteration 765, loss = 1.5541\n","Checking accuracy on validation set\n","Got 499 / 1000 correct (49.90)\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"qeTdi4-xIMB_"},"source":["# Part IV. PyTorch Sequential API\n","\n","Part III introduced the PyTorch Module API, which allows you to define arbitrary learnable layers and their connectivity. \n","\n","For simple models like a stack of feed forward layers, you still need to go through 3 steps: subclass `nn.Module`, assign layers to class attributes in `__init__`, and call each layer one by one in `forward()`. Is there a more convenient way? \n","\n","Fortunately, PyTorch provides a container Module called `nn.Sequential`, which merges the above steps into one. It is not as flexible as `nn.Module`, because you cannot specify more complex topology than a feed-forward stack, but it's good enough for many use cases.\n","\n","Before we start, We need to wrap `flatten` function in a module in order to stack it in `nn.Sequential`.\n","As of 1.3.0, PyTorch supports `nn.Flatten`, so this is not required in the latest version.\n","However, let's use the following `Flatten` class for backward compatibility for now."]},{"cell_type":"code","metadata":{"id":"eoBUeHDDeM1j","executionInfo":{"status":"ok","timestamp":1606119736819,"user_tz":-480,"elapsed":115543,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["class Flatten(nn.Module):\n","  def forward(self, x):\n","    return flatten(x)"],"execution_count":20,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"ZZq-3hjxeYr4"},"source":["### Sequential API: Two-Layer Network\n","Let's see how to rewrite our two-layer fully connected network example with `nn.Sequential`, and train it using the training loop defined above.\n","Here, let's skip weight initialization for simplicity;\n","with a more advanced optimizer than the naive SGD, the default initialization provided in `torch.nn` is good enough for shallow networks.\n","\n","Again, you don't need to tune any hyperparameters here, but you should achieve above 40% accuracy after one epoch of training."]},{"cell_type":"code","metadata":{"id":"9smkhciWIMCC","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119757777,"user_tz":-480,"elapsed":136485,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"cc0d39d2-86b4-4dd6-c998-b98f99393e93"},"source":["fix_random_seed(0)\n","\n","C, H, W = 3, 32, 32\n","num_classes = 10\n","\n","hidden_layer_size = 4000\n","learning_rate = 1e-2\n","momentum = 0.5\n","\n","# To give a specific name to each module, use OrderedDict.\n","model = nn.Sequential(OrderedDict([\n","  ('flatten', nn.Flatten()),\n","  ('fc1', nn.Linear(C*H*W, hidden_layer_size)),\n","  ('relu1', nn.ReLU()),\n","  ('fc2', nn.Linear(hidden_layer_size, num_classes)),\n","]))\n","\n","print('Architecture:')\n","print(model) # printing `nn.Module` shows the architecture of the module.\n","\n","# you can use Nesterov momentum in optim.SGD\n","optimizer = optim.SGD(model.parameters(), lr=learning_rate, \n","                      weight_decay=weight_decay,\n","                      momentum=momentum, nesterov=True)\n","\n","_ = train_part345(model, optimizer, epochs=1)"],"execution_count":21,"outputs":[{"output_type":"stream","text":["Architecture:\n","Sequential(\n","  (flatten): Flatten(start_dim=1, end_dim=-1)\n","  (fc1): Linear(in_features=3072, out_features=4000, bias=True)\n","  (relu1): ReLU()\n","  (fc2): Linear(in_features=4000, out_features=10, bias=True)\n",")\n","Epoch 0, Iteration 0, loss = 2.3772\n","Checking accuracy on validation set\n","Got 137 / 1000 correct (13.70)\n","\n","Epoch 0, Iteration 100, loss = 1.7475\n","Checking accuracy on validation set\n","Got 394 / 1000 correct (39.40)\n","\n","Epoch 0, Iteration 200, loss = 1.7002\n","Checking accuracy on validation set\n","Got 408 / 1000 correct (40.80)\n","\n","Epoch 0, Iteration 300, loss = 1.6130\n","Checking accuracy on validation set\n","Got 442 / 1000 correct (44.20)\n","\n","Epoch 0, Iteration 400, loss = 1.6352\n","Checking accuracy on validation set\n","Got 452 / 1000 correct (45.20)\n","\n","Epoch 0, Iteration 500, loss = 1.5462\n","Checking accuracy on validation set\n","Got 462 / 1000 correct (46.20)\n","\n","Epoch 0, Iteration 600, loss = 1.4803\n","Checking accuracy on validation set\n","Got 470 / 1000 correct (47.00)\n","\n","Epoch 0, Iteration 700, loss = 1.6634\n","Checking accuracy on validation set\n","Got 453 / 1000 correct (45.30)\n","\n","Epoch 0, Iteration 765, loss = 1.2921\n","Checking accuracy on validation set\n","Got 455 / 1000 correct (45.50)\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"5YMYfFUGIMCH"},"source":["### Sequential API: Three-Layer ConvNet\n","Here you should use `nn.Sequential` to define and train a three-layer ConvNet with the same architecture we used in Part III:\n","\n","1. Convolutional layer (with bias) with 32 5x5 filters, with zero-padding of 2\n","2. ReLU\n","3. Convolutional layer (with bias) with 16 3x3 filters, with zero-padding of 1\n","4. ReLU\n","5. Fully-connected layer (with bias) to compute scores for 10 classes\n","\n","You don't have to re-initialize your weight matrices and bias vectors.\n","\n","You should optimize your model using stochastic gradient descent with Nesterov momentum 0.5, with L2 weight decay of 1e-4..\n","\n","Again, you don't need to tune any hyperparameters but you should see accuracy above 50% after one epoch of training."]},{"cell_type":"code","metadata":{"id":"Q2To2-mtIMCJ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119777531,"user_tz":-480,"elapsed":156218,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"74099d91-48f9-4aa5-8986-55c90668b0bd"},"source":["fix_random_seed(0)\n","\n","C, H, W = 3, 32, 32\n","num_classes = 10\n","\n","channel_1 = 32\n","channel_2 = 16\n","kernel_size_1 = 5\n","pad_size_1 = 2\n","kernel_size_2 = 3\n","pad_size_2 = 1\n","\n","learning_rate = 1e-2\n","momentum = 0.5\n","\n","model = None\n","optimizer = None\n","################################################################################\n","# TODO: Rewrite the 3-layer ConvNet with bias from Part III with the           #\n","# Sequential API.                                                              #\n","################################################################################\n","# Replace \"pass\" statement with your code\n","model = nn.Sequential(OrderedDict([\n","      ('conv1', nn.Conv2d(C, channel_1, kernel_size=kernel_size_1, padding=pad_size_1)),\n","      ('relu1', nn.ReLU()),\n","      ('conv2', nn.Conv2d(channel_1, channel_2, kernel_size_2, padding=pad_size_2)),\n","      ('relu2', nn.ReLU()),\n","      ('flatten', nn.Flatten()), \n","      ('fc', nn.Linear(channel_2*H*W, num_classes)),\n","]))\n","optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=1e-4)\n","################################################################################\n","#                                 END OF YOUR CODE                             #\n","################################################################################\n","\n","print('Architecture:')\n","print(model) # printing `nn.Module` shows the architecture of the module.\n","\n","_ = train_part345(model, optimizer)"],"execution_count":22,"outputs":[{"output_type":"stream","text":["Architecture:\n","Sequential(\n","  (conv1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n","  (relu1): ReLU()\n","  (conv2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","  (relu2): ReLU()\n","  (flatten): Flatten(start_dim=1, end_dim=-1)\n","  (fc): Linear(in_features=16384, out_features=10, bias=True)\n",")\n","Epoch 0, Iteration 0, loss = 2.2960\n","Checking accuracy on validation set\n","Got 133 / 1000 correct (13.30)\n","\n","Epoch 0, Iteration 100, loss = 1.6909\n","Checking accuracy on validation set\n","Got 400 / 1000 correct (40.00)\n","\n","Epoch 0, Iteration 200, loss = 1.4680\n","Checking accuracy on validation set\n","Got 480 / 1000 correct (48.00)\n","\n","Epoch 0, Iteration 300, loss = 1.7091\n","Checking accuracy on validation set\n","Got 481 / 1000 correct (48.10)\n","\n","Epoch 0, Iteration 400, loss = 1.6187\n","Checking accuracy on validation set\n","Got 509 / 1000 correct (50.90)\n","\n","Epoch 0, Iteration 500, loss = 1.4000\n","Checking accuracy on validation set\n","Got 483 / 1000 correct (48.30)\n","\n","Epoch 0, Iteration 600, loss = 1.5005\n","Checking accuracy on validation set\n","Got 541 / 1000 correct (54.10)\n","\n","Epoch 0, Iteration 700, loss = 1.3263\n","Checking accuracy on validation set\n","Got 545 / 1000 correct (54.50)\n","\n","Epoch 0, Iteration 765, loss = 1.3596\n","Checking accuracy on validation set\n","Got 548 / 1000 correct (54.80)\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"CM6FuhGPIMCO"},"source":["# Part V. ResNet for CIFAR-10\n","\n","In this section, you are going to implement [ResNet](https://arxiv.org/abs/1512.03385), one of the state-of-the-art CNN architecture.\n","Specifically, you are going to implement a variation of ResNet called [PreResNet](https://arxiv.org/abs/1603.05027), which locates activation before each convolutional layer (so called pre-activation).\n","You are going to first implement a plain building block, residual block, and then bottleneck block for really deep networks.\n","Finally, you will implement your own ResNet using those blocks.\n","\n","Throughout this part, we will follow the PyTorch default weight initialization for conciseness."]},{"cell_type":"markdown","metadata":{"id":"f8pgYlKN9NLH"},"source":["## Plain block\n","\n","First, let's implement a plain block without residual connection.\n","PreResNet has a different order of layers from the previously implemented ones;\n","BatchNorm and ReLU precedes Conv.\n","The name of PreResNet comes form this pre-activation architecture.\n","Here, for downsampling, we don't introduce MaxPool layers explicitly, but use stride 2 in the first Conv layer in the block.\n","\n","Concretely, a plain block accepts a feature map of shape $C_{in} \\times H_{in} \\times W_{in}$ and produces a feature map of shape $C_{out} \\times H_{out} \\times W_{out}$. If the block performs downsampling, then $W_{out}=W_{in}/2$ and $H_{out}=H_{in}/2$; otherwise $H_{out}=H_{in}$ and $W_{out}=W_{in}$. The plain block consists of the following six layers in order:\n","\n","1. Spatial Batch normalization\n","2. ReLU\n","3. Convolutional layer with `Cout` 3x3 filters, zero-padding of 1, and stride 2 if downsampling; otherwise stride 1\n","4. Spatial Batch normalization\n","5. ReLU\n","6. Convolutional layer with `Cout` 3x3 filters, with zero-padding of 1\n"]},{"cell_type":"code","metadata":{"id":"-c4QBBj5-A3R","executionInfo":{"status":"ok","timestamp":1606119777555,"user_tz":-480,"elapsed":156238,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["class PlainBlock(nn.Module):\n","  def __init__(self, Cin, Cout, downsample=False):\n","    super().__init__()\n","\n","    self.net = None\n","    ############################################################################\n","    # TODO: Implement plain block.                                             #\n","    # Hint: Wrap your layers by nn.Sequential() to output a single module.     #\n","    #       You don't have use OrderedDict.                                    #\n","    # Inputs:                                                                  #\n","    # - Cin: number of input channels                                          #\n","    # - Cout: number of output channels                                        #\n","    # - downsample: add downsampling (a conv with stride=2) if True            #\n","    # Store the result in self.net.                                            #\n","    ############################################################################\n","    # Replace \"pass\" statement with your code\n","    s = 2 if downsample else 1\n","    self.net = nn.Sequential(OrderedDict([\n","      ('bn1', nn.BatchNorm2d(Cin)),\n","      ('relu1', nn.ReLU()),\n","      ('conv1', nn.Conv2d(Cin, Cout, kernel_size=3, padding=1, stride=s)),\n","      ('bn2', nn.BatchNorm2d(Cout)),\n","      ('relu2', nn.ReLU()),\n","      ('conv2', nn.Conv2d(Cout, Cout, kernel_size=3, padding=1)),\n","    ]))\n","    ############################################################################\n","    #                                 END OF YOUR CODE                         #\n","    ############################################################################\n","\n","  def forward(self, x):\n","    return self.net(x)"],"execution_count":23,"outputs":[]},{"cell_type":"code","metadata":{"id":"SofEF-vyAekS","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606119777558,"user_tz":-480,"elapsed":156219,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"6ceaa1cf-8baa-41a8-c1ec-4455519f1dde"},"source":["data = torch.zeros(2, 3, 5, 6)\n","model = PlainBlock(3, 10)\n","if list(model(data).shape) == [2, 10, 5, 6]:\n","  print('The output of PlainBlock without downsampling has a *correct* dimension!')\n","else:\n","  print('The output of PlainBlock without downsampling has an *incorrect* dimension! expected:', [2, 10, 5, 6], 'got:', list(model(data).shape))\n","\n","data = torch.zeros(2, 3, 5, 6)\n","model = PlainBlock(3, 10, downsample=True)\n","if list(model(data).shape) == [2, 10, 3, 3]:\n","  print('The output of PlainBlock with downsampling has a *correct* dimension!')\n","else:\n","  print('The output of PlainBlock with downsampling has an *incorrect* dimension! expected:', [2, 10, 3, 3], 'got:', list(model(data).shape))"],"execution_count":24,"outputs":[{"output_type":"stream","text":["The output of PlainBlock without downsampling has a *correct* dimension!\n","The output of PlainBlock with downsampling has a *correct* dimension!\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"ASV5NdNADo3F"},"source":["## Residual block\n","\n","Next, let's implement a residual block.\n","A residual block adds a residual connection to a plain block. Let $\\mathcal{F}$ be a plain block; then the residual version  $\\mathcal{R}$ of the plain block $\\mathcal{F}$ computes:\n","\n","$\\mathcal{R}(x) = \\mathcal{F}(x) + x$\n","\n","However, this implementation will only work if the output from the plain block $\\mathcal{F}(x)$ has the same shape as the input $x$. Based on the plain block that we implemented above, there are two cases where the output of the plain block can have a different shape than the input:\n","\n","1. The number of output channels $C_{out}$ is different from the number of input channels $C_{in}$\n","2. The plain block $\\mathcal{F}$ performs spatial downsampling\n","\n","To deal with these cases, we need generalize our definition of the residual block and add a *shortcut connection* $\\mathcal{G}$:\n","\n","$\\mathcal{R}(x) = \\mathcal{F}(x) + \\mathcal{G}(x)$\n","\n","There are three cases for the shortcut connection $\\mathcal{G}$:\n","\n","1. If $C_{in}=C_{out}$ and $\\mathcal{F}$ does not perform downsampling, then $\\mathcal{F}(x)$ will have the same shape as $x$, so $\\mathcal{G}$ is the identity function: $\\mathcal{G}(x) = x$\n","2. If $C_{in} \\neq C_{out}$ and $\\mathcal{F}$ does not downsample, then $\\mathcal{G}$ is a 1x1 convolution with $C_{out}$ filters and stride 1.\n","3. If $\\mathcal{F}$ downsamples, then $\\mathcal{G}$ is a 1x1 convolution with $C_{out}$ filters and stride 2.\n","\n","In the code below, implement a residual block using the plain block we just defined:"]},{"cell_type":"code","metadata":{"id":"jzqJCUx6Do3I","executionInfo":{"status":"ok","timestamp":1606120109032,"user_tz":-480,"elapsed":2352,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["class ResidualBlock(nn.Module):\n","  def __init__(self, Cin, Cout, downsample=False):\n","    super().__init__()\n","\n","    self.block = None # F\n","    self.shortcut = None # G\n","    ############################################################################\n","    # TODO: Implement residual block using plain block. Hint: nn.Identity()    #\n","    # Inputs:                                                                  #\n","    # - Cin: number of input channels                                          #\n","    # - Cout: number of output channels                                        #\n","    # - downsample: add downsampling (a conv with stride=2) if True            #\n","    # Store the main block in self.block and the shortcut in self.shortcut.    #\n","    ############################################################################\n","    # Replace \"pass\" statement with your code\n","    self.block = PlainBlock(Cin, Cout, downsample)\n","\n","    stride = 2 if downsample else 1\n","    self.shortcut = nn.Sequential()\n","    if Cin != Cout or downsample:\n","      self.shortcut = nn.Sequential(nn.Conv2d(Cin, Cout, 1,\n","              stride=stride, padding=0))\n","\n","    ############################################################################\n","    #                                 END OF YOUR CODE                         #\n","    ############################################################################\n","  \n","  def forward(self, x):\n","    return self.block(x) + self.shortcut(x)"],"execution_count":27,"outputs":[]},{"cell_type":"code","metadata":{"id":"TMJ3-eI3Do3M","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606120114459,"user_tz":-480,"elapsed":2549,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"20208501-08c3-49a2-fdbf-b29d78f70096"},"source":["data = torch.zeros(2, 3, 5, 6)\n","model = ResidualBlock(3, 10)\n","if list(model(data).shape) == [2, 10, 5, 6]:\n","  print('The output of ResidualBlock without downsampling has a *correct* dimension!')\n","else:\n","  print('The output of ResidualBlock without downsampling has an *incorrect* dimension! expected:', [2, 10, 5, 6], 'got:', list(model(data).shape))\n","\n","data = torch.zeros(2, 3, 5, 6)\n","model = ResidualBlock(3, 10, downsample=True)\n","if list(model(data).shape) == [2, 10, 3, 3]:\n","  print('The output of ResidualBlock with downsampling has a *correct* dimension!')\n","else:\n","  print('The output of ResidualBlock with downsampling has an *incorrect* dimension! expected:', [2, 10, 3, 3], 'got:', list(model(data).shape))"],"execution_count":28,"outputs":[{"output_type":"stream","text":["The output of ResidualBlock without downsampling has a *correct* dimension!\n","The output of ResidualBlock with downsampling has a *correct* dimension!\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"wrEzQeTBvNso"},"source":["## Residual stage\n","\n","So far, you implemented micro layers, which consists of several convolutional laters.\n","To efficiently build a deep neural network, we define a macro layer by repeating the micro layers.\n","\n","For your convenience, we provide the implementation below."]},{"cell_type":"code","metadata":{"id":"NhgWE6qTvVW6","executionInfo":{"status":"ok","timestamp":1606120307483,"user_tz":-480,"elapsed":1372,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["class ResNetStage(nn.Module):\n","  def __init__(self, Cin, Cout, num_blocks, downsample=True,\n","               block=ResidualBlock):\n","    super().__init__()\n","    blocks = [block(Cin, Cout, downsample)]\n","    for _ in range(num_blocks - 1):\n","      blocks.append(block(Cout, Cout))\n","    self.net = nn.Sequential(*blocks)\n","  \n","  def forward(self, x):\n","    return self.net(x)"],"execution_count":29,"outputs":[]},{"cell_type":"code","metadata":{"id":"I21i5J3AnbhM","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606120311840,"user_tz":-480,"elapsed":1013,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"1bde51f4-d519-4a63-8a80-8811d9934dda"},"source":["print('Plain block stage:')\n","print(ResNetStage(3, 4, 2, block=PlainBlock))\n","print('Residual block stage:')\n","print(ResNetStage(3, 4, 2, block=ResidualBlock))"],"execution_count":30,"outputs":[{"output_type":"stream","text":["Plain block stage:\n","ResNetStage(\n","  (net): Sequential(\n","    (0): PlainBlock(\n","      (net): Sequential(\n","        (bn1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","        (relu1): ReLU()\n","        (conv1): Conv2d(3, 4, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n","        (bn2): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","        (relu2): ReLU()\n","        (conv2): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","      )\n","    )\n","    (1): PlainBlock(\n","      (net): Sequential(\n","        (bn1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","        (relu1): ReLU()\n","        (conv1): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","        (bn2): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","        (relu2): ReLU()\n","        (conv2): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","      )\n","    )\n","  )\n",")\n","Residual block stage:\n","ResNetStage(\n","  (net): Sequential(\n","    (0): ResidualBlock(\n","      (block): PlainBlock(\n","        (net): Sequential(\n","          (bn1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","          (relu1): ReLU()\n","          (conv1): Conv2d(3, 4, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n","          (bn2): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","          (relu2): ReLU()\n","          (conv2): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","        )\n","      )\n","      (shortcut): Sequential(\n","        (0): Conv2d(3, 4, kernel_size=(1, 1), stride=(2, 2))\n","      )\n","    )\n","    (1): ResidualBlock(\n","      (block): PlainBlock(\n","        (net): Sequential(\n","          (bn1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","          (relu1): ReLU()\n","          (conv1): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","          (bn2): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","          (relu2): ReLU()\n","          (conv2): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","        )\n","      )\n","      (shortcut): Sequential()\n","    )\n","  )\n",")\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"7l3-PNR9mlIb"},"source":["## Residual stem\n","\n","A \"stem\" layer is required at the beginning of the network, which increases the number of channels while keeping the other dimensions.\n","\n","For your convenience, we provide the implementation below."]},{"cell_type":"code","metadata":{"id":"eUvbubymmlIc","executionInfo":{"status":"ok","timestamp":1606120524312,"user_tz":-480,"elapsed":1357,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["class ResNetStem(nn.Module):\n","  def __init__(self, Cin=3, Cout=8):\n","    super().__init__()\n","    layers = [\n","        nn.Conv2d(Cin, Cout, kernel_size=3, padding=1, stride=1),\n","        nn.ReLU(),\n","    ]\n","    self.net = nn.Sequential(*layers)\n","    \n","  def forward(self, x):\n","    return self.net(x)"],"execution_count":31,"outputs":[]},{"cell_type":"code","metadata":{"id":"UGzh0oVxm2Aw","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606120525493,"user_tz":-480,"elapsed":691,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"92d84fa8-f429-4668-9967-5709df23ad2b"},"source":["data = torch.zeros(2, 3, 5, 6)\n","model = ResNetStem(3, 10)\n","if list(model(data).shape) == [2, 10, 5, 6]:\n","  print('The output of ResidualBlock without downsampling has a *correct* dimension!')\n","else:\n","  print('The output of ResidualBlock without downsampling has an *incorrect* dimension! expected:', [2, 10, 5, 6], 'got:', list(model(data).shape))"],"execution_count":32,"outputs":[{"output_type":"stream","text":["The output of ResidualBlock without downsampling has a *correct* dimension!\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"QGzn4Gp_sYBp"},"source":["## ResNet class\n","\n","Now, it is time to design the ResNet class using the blocks you implemented above!\n","\n","For general applicability, the class will get a dictionary of the architecture specification as an input, and parse it to build a CNN.\n","\n","Here we provide a couple of examples of specification;\n","`networks` is a collection of pre-defined network specifications, where each can be called by `get_resnet(key)`, where key is the name of the network, e.g., `get_resnet('resnet32')` will return a ResNet with 32 layers.\n","\n","Each specification consists of multiple tuples which correspond to a macro block (`ResNetStage`), and the values in each tuple implies `(num_in_channels, num_out_channels, num_blocks, do_downsample)`.\n","\n","To avoid dependency on the size of the input, ResNet has an average pooling at the end of the convolutional part, such that the size of the input tensor to the linear layer is always `(batch_size, stage_args[-1][1])`.\n","You may want to add an average pooling layer (`nn.AvgPool2d`), but it requires to know the size of the input.\n","Can you relax this requirement?\n","\n","**Hint**: You can perform average pooling in `forward`."]},{"cell_type":"code","metadata":{"id":"1iOOBoSgs-0X","executionInfo":{"status":"ok","timestamp":1606121734617,"user_tz":-480,"elapsed":1110,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["# example of specifications\n","networks = {\n","  'plain32': {\n","    'block': PlainBlock,\n","    'stage_args': [\n","      (8, 8, 5, False),\n","      (8, 16, 5, True),\n","      (16, 32, 5, True),\n","    ]\n","  },\n","  'resnet32': {\n","    'block': ResidualBlock,\n","    'stage_args': [\n","      (8, 8, 5, False),\n","      (8, 16, 5, True),\n","      (16, 32, 5, True),\n","    ]\n","  },\n","}"],"execution_count":46,"outputs":[]},{"cell_type":"code","metadata":{"id":"nVSeEzB7scmW","executionInfo":{"status":"ok","timestamp":1606122415248,"user_tz":-480,"elapsed":2611,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["class ResNet(nn.Module):\n","  def __init__(self, stage_args, Cin=3, block=ResidualBlock, num_classes=10):\n","    super().__init__()\n","\n","    self.cnn = None\n","    ############################################################################\n","    # TODO: Implement the convolutional part of ResNet using ResNetStem,       #\n","    #       ResNetStage, and wrap the modules by nn.Sequential.                #\n","    # Store the model in self.cnn.                                             #\n","    ############################################################################\n","    # Replace \"pass\" statement with your code\n","\n","    blocks = [block(Cin, Cout=stage_args[0][0])]\n","\n","    for i in range(len(stage_args)):\n","      blocks.append(ResNetStage(*stage_args[i], block=block))\n","    self.cnn = nn.Sequential(*blocks)\n","    ############################################################################\n","    #                                 END OF YOUR CODE                         #\n","    ############################################################################\n","    self.fc = nn.Linear(stage_args[-1][1], num_classes)\n","  \n","  def forward(self, x):\n","    scores = None\n","    ############################################################################\n","    # TODO: Implement the forward function of ResNet.                          #\n","    # Store the output in `scores`.                                            #\n","    ############################################################################\n","    # Replace \"pass\" statement with your code\n","    scores = self.cnn(x)\n","    scores = F.avg_pool2d(scores, scores.shape[2])\n","    scores = torch.reshape(scores, (scores.shape[0], -1))\n","    scores = self.fc(scores)\n","    ############################################################################\n","    #                                 END OF YOUR CODE                         #\n","    ############################################################################\n","    return scores\n","\n","def get_resnet(name):\n","  return ResNet(**networks[name])\n","\n"],"execution_count":52,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"uqhp641H0P9Z"},"source":["## Train your model!\n","\n","Now let's train a few epochs of plain and residual networks with 32 layers on CIFAR.\n","You will see that deep non-residual networks don't converge well.\n","\n","**Caution: This takes a long time!**\n","\n","**Disclaimer: The performance of PreResNet-32 you will see here (~ 80%) would be lower than the best performance this model can achieve, because the convergence requires much more training.**"]},{"cell_type":"code","metadata":{"id":"hRyYVBn60A58","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606123541517,"user_tz":-480,"elapsed":1121947,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"3cf400b7-bbd4-478d-dc40-3613fc699fc8"},"source":["# def init_module(model):\n","#   for m in model.modules():\n","#     if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):\n","#       nn.init.kaiming_normal_(m.weight.data)\n","#       if m.bias is not None: nn.init.zeros_(m.bias.data)\n","#     elif isinstance(m, nn.BatchNorm2d):\n","#       nn.init.ones_(m.weight.data)\n","#       if m.bias is not None: nn.init.zeros_(m.bias.data)\n","\n","names = ['plain32', 'resnet32']\n","acc_history_dict = {}\n","iter_history_dict = {}\n","for name in names:\n","  fix_random_seed(0)\n","  print(name, '\\n')\n","  model = get_resnet(name)\n","#   init_module(model)\n","  \n","  optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=.9, weight_decay=1e-4)\n","\n","  acc_history, iter_history = train_part345(model, optimizer, epochs=10, schedule=[6, 8], verbose=False)\n","  acc_history_dict[name] = acc_history\n","  iter_history_dict[name] = iter_history"],"execution_count":53,"outputs":[{"output_type":"stream","text":["plain32 \n","\n","Epoch 0, Iteration 765, loss = 2.0176\n","Checking accuracy on validation set\n","Got 321 / 1000 correct (32.10)\n","\n","Epoch 1, Iteration 1531, loss = 1.7401\n","Checking accuracy on validation set\n","Got 426 / 1000 correct (42.60)\n","\n","Epoch 2, Iteration 2297, loss = 1.1602\n","Checking accuracy on validation set\n","Got 379 / 1000 correct (37.90)\n","\n","Epoch 3, Iteration 3063, loss = 1.2892\n","Checking accuracy on validation set\n","Got 535 / 1000 correct (53.50)\n","\n","Epoch 4, Iteration 3829, loss = 1.1772\n","Checking accuracy on validation set\n","Got 556 / 1000 correct (55.60)\n","\n","Epoch 5, Iteration 4595, loss = 0.9069\n","Checking accuracy on validation set\n","Got 569 / 1000 correct (56.90)\n","\n","lr decay from 0.01 to 0.001\n","Epoch 6, Iteration 5361, loss = 0.8141\n","Checking accuracy on validation set\n","Got 678 / 1000 correct (67.80)\n","\n","Epoch 7, Iteration 6127, loss = 1.1717\n","Checking accuracy on validation set\n","Got 671 / 1000 correct (67.10)\n","\n","lr decay from 0.001 to 0.0001\n","Epoch 8, Iteration 6893, loss = 0.9146\n","Checking accuracy on validation set\n","Got 685 / 1000 correct (68.50)\n","\n","Epoch 9, Iteration 7659, loss = 1.0471\n","Checking accuracy on validation set\n","Got 694 / 1000 correct (69.40)\n","\n","resnet32 \n","\n","Epoch 0, Iteration 765, loss = 0.8889\n","Checking accuracy on validation set\n","Got 499 / 1000 correct (49.90)\n","\n","Epoch 1, Iteration 1531, loss = 1.0764\n","Checking accuracy on validation set\n","Got 606 / 1000 correct (60.60)\n","\n","Epoch 2, Iteration 2297, loss = 0.9305\n","Checking accuracy on validation set\n","Got 692 / 1000 correct (69.20)\n","\n","Epoch 3, Iteration 3063, loss = 0.7663\n","Checking accuracy on validation set\n","Got 715 / 1000 correct (71.50)\n","\n","Epoch 4, Iteration 3829, loss = 0.5873\n","Checking accuracy on validation set\n","Got 721 / 1000 correct (72.10)\n","\n","Epoch 5, Iteration 4595, loss = 0.7319\n","Checking accuracy on validation set\n","Got 699 / 1000 correct (69.90)\n","\n","lr decay from 0.01 to 0.001\n","Epoch 6, Iteration 5361, loss = 0.5146\n","Checking accuracy on validation set\n","Got 807 / 1000 correct (80.70)\n","\n","Epoch 7, Iteration 6127, loss = 0.3172\n","Checking accuracy on validation set\n","Got 818 / 1000 correct (81.80)\n","\n","lr decay from 0.001 to 0.0001\n","Epoch 8, Iteration 6893, loss = 0.2713\n","Checking accuracy on validation set\n","Got 817 / 1000 correct (81.70)\n","\n","Epoch 9, Iteration 7659, loss = 0.3357\n","Checking accuracy on validation set\n","Got 818 / 1000 correct (81.80)\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"-u89CIFfzWWR","colab":{"base_uri":"https://localhost:8080/","height":295},"executionInfo":{"status":"ok","timestamp":1606123542727,"user_tz":-480,"elapsed":1117987,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"21823e1a-a83f-4a69-fecc-299dc347ae36"},"source":["plt.title('Val accuracies')\n","for name in names:\n","  plt.plot(iter_history_dict[name], acc_history_dict[name], '-o')\n","plt.legend(names, loc='upper left')\n","plt.xlabel('iterations')\n","plt.ylabel('accuracy')\n","plt.gcf().set_size_inches(9, 4)\n","plt.show()"],"execution_count":54,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAjUAAAEWCAYAAACexWadAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeXxU1f3/8dcnIRskJKxhX5RF9i0gqF8XUNG6o7W4WxfUutaWKr9abbVWW622VtQiLqCCoAKioohLXVslLLIjKAgJS9iSkJA95/fHnSyEBAJkMpmZ9/PxmEfm3rlz53MvJHnn3HPuMeccIiIiIsEuItAFiIiIiNQFhRoREREJCQo1IiIiEhIUakRERCQkKNSIiIhISFCoERERkZCgUCMitWZmzsy6BbqOumZmz5nZHwJdh4gcHYUakTBiZh+Y2YPVrL/AzLaZWaNA1BVozrmbnXMPBboOETk6CjUi4WUKcKWZWZX1VwGvOeeKA1BTnTCzyEDXICKBpVAjEl7mAC2A/ytbYWbNgHOBqWY2zMz+a2aZZrbVzJ42s+ja7NjMfmlmq81sr5n9aGY3VXn9AjNbambZZvaDmZ3lW9/czF4ysy1mtsfM5vjWX2tmX1bZR/nlLzN72cyeNbN5ZpYLnGZm55jZEt9nbDazP1Z5/0lm9rXv+Dab2bWV9vXnStud66s107d9/0qv3WNm6b7jXGtmo2pzfkTE/xRqRMKIcy4PmAlcXWn1pcAa59x3QAnwa6AlMAIYBfyqlrvPwAtHTYFfAk+a2WAAMxsGTAXGA0nAycBG3/teARoDfYDWwJOHcUiXAw8DCcCXQK7v2JKAc4BbzOxCXw2dgfeBfwGtgIHA0qo7NLNBwIvATXgB8N/AXDOLMbOewG3AUOdcAjC60nGISIAp1IiEnynAJWYW61u+2rcO59wi59z/nHPFzrmNeL/QT6nNTp1z7znnfnCez4APqWgRuh540Tm3wDlX6pxLd86tMbO2wNnAzc65Pc65It97a+tt59xXvn3mO+f+45xb7lteBkyvVP/lwEfOuem+z9nlnDsg1ADjgH87575xzpU456YABcBwvNAXA/Q2syjn3Ebn3A+HUa+I+JFCjUiYcc59CewELjSzY4FhwDQAM+thZu/6Og1nA3/Ba7U5JDM728z+Z2a7zSwT+Fml93YEqvvl3xHY7Zzbc4SHs7lKDceb2admtsPMsoCba1FDVZ2B3/guPWX6jqUj0M45tx64C/gjkGFmr5tZuyOsXUTqmEKNSHiaitdCcyUw3zm33bf+WWAN0N051xT4f0DVTsUHMLMY4C3gcSDZOZcEzKv03s3AsdW8dTPQ3MySqnktF++yVNlntKlmG1dleRowF+jonEsEnqtFDdXV9LBzLqnSo7FzbjqAc26ac+4kvPDjgL/WYp8iUg8UakTC01TgdOBGfJeefBKAbCDHzI4Dbqnl/qLxLsvsAIrN7GzgzEqvvwD80sxGmVmEmbU3s+Occ1vx+rk8Y2bNzCzKzE72vec7oI+ZDfRdKvtjLepIwGv5yff147m80muvAaeb2aVm1sjMWpjZwGr28Txws6/Vx8ysia8DcoKZ9TSzkb4Qlw/kAaW1PEci4mcKNSJhyNdf5mugCV7LRpnf4gWBvXi/3GfUcn97gTvwOiHv8e1jbqXXv8XXeRjIAj7Da+kAbzh5EV4LUQbe5R2cc98DDwIfAevwOgIfyq+AB81sL3C/r56yGjbhXRL7DbAbr5PwgGqOJRUv7D3tO5b1wLW+l2OAR/Eu323D69g8oRZ1iUg9MOeqtt6KiIiIBB+11IiIiEhIUKgRERGRkKBQIyIiIiFBoUZERERCQtDNyNuyZUvXpUuXQJchIiIidWDRokU7nXOt6mJfQRdqunTpQmpqaqDLEBERkTpgZj/V1b50+UlERERCgkKNiIiIhASFGhEREQkJQdenpjpFRUWkpaWRn58f6FKCTmxsLB06dCAqKirQpYiIiByVkAg1aWlpJCQk0KVLF8wOOaGw+Djn2LVrF2lpaXTt2jXQ5YiIiByVkLj8lJ+fT4sWLRRoDpOZ0aJFC7VwiUjDsGwmPNkX/pjkfV0289DvEf8Ksn+TkGipARRojpDOm4g0CMtmwjt3QFGet5y12VsG6H9p4Oo6UstmwscPQlYaJHaAUfcH33EE4b9JyIQaEREJMoX7IHcH5O6EDyZU/PIsU5QH7/3W+2VqEQd52CFer802dbEP3zZr3/cCTbGvFTxrM8y9HXK2Q4+zwZUe5OEO8Xo9bvPVU9X/m3z8oEJNuDr11FN5/PHHSUlJqXGbG264gbvvvpvevXvXuM1zzz3HxIkTiYyMJD4+nkmTJtG7d28WLFjAvffeS2FhIdHR0Tz22GOMHDnSH4ciInJwJcWQt9sXVHxhpfx5peWcDO95Ue6h91mQ5f0SDXbF+fDhfd4j2GWlBbqCGoVlqJmzJJ3H5q9lS2Ye7ZLiGD+6JxcOah+weiZPnnzIbS6//HJuvvlmAObOncvdd9/NBx98QMuWLXnnnXdo164dK1asYPTo0aSnp/u7ZBEJB85Bwd5qAkrVsOJ77NsNuAP3Y5HQpJXv0RI6dvWex7eqWD/3Ni/sVJXYAW5f3PBbNSq/Pvumms/pmMl12yrkz338a1D1ASaxwxH/l/K3sAs1c5akM2HWcvKKSgBIz8xjwqzlAEcVbDZu3MhZZ53FkCFDWLx4MX369GHq1Kn7bXPLLbewcOFC8vLyuOSSS/jTn/4E7N+aEx8fz5133sm7775LXFwcb7/9NsnJyTRt2rR8P7m5ueV9YQYNGlS+vk+fPuTl5VFQUEBMTMwRH4uIBJHD7btRXAj7aggoOdW0qpQUVL+f2MSKQNKyO3Q+sSK0NGm1f4iJTYKIQ4xLOfPh/ftvAETFwagHoFGQ/Tz75M/eJaeqEjtC/5/Xfz1HatQDNfyb3B+4mg4h5ELNn95Zyaot2TW+vmRTJoUlpfutyysq4XdvLmP6t5uqfU/vdk154Lw+h/zstWvX8sILL3DiiSdy3XXX8cwzz+z3+sMPP0zz5s0pKSlh1KhRLFu2jP79+++3TW5uLsOHD+fhhx/md7/7Hc8//zz33ec1V06cOJEnnniCwsJCPvnkkwM+/6233mLw4MEKNCLhorqOnG/fCj98AkmdITfjwOCSn1X9viJjKkJIfGtI7lMloFR63rglNIqu22MpC2LB3rkWvLqDLAxUKwj/TfwaaszsLOCfQCQw2Tn3aJXXOwFTgCTfNvc65+b5s6aqgeZQ6w9Hx44dOfHEEwG48soreeqpp/Z7febMmUyaNIni4mK2bt3KqlWrDgg10dHRnHvuuQAMGTKEBQsWlL926623cuuttzJt2jT+/Oc/M2XKlPLXVq5cyT333MOHH3541MchIkHi4wcP7MhZUgjfTQcMGjevCCJt+lcfUMqexyR4lyICqf+lDfoXZq0FYRioUZD9m/gt1JhZJDAROANIAxaa2Vzn3KpKm90HzHTOPWtmvYF5QJej+dxDtaic+OgnpGfmHbC+fVIcM24acTQffcDw6MrLGzZs4PHHH2fhwoU0a9aMa6+9ttr7w0RFRZW/LzIykuLi4gO2GTt2LLfcckv5clpaGhdddBFTp07l2GOPPapjEJEgUmOHTYM/7ITIkGuMDx5BFgZChT9vvjcMWO+c+9E5Vwi8DlxQZRsHlHUWSQS2+LEeAMaP7klcVOR+6+KiIhk/uudR73vTpk3897//BWDatGmcdNJJ5a9lZ2fTpEkTEhMT2b59O++///5h7XvdunXlz9977z26d+8OQGZmJueccw6PPvpoeSuRiISBtNSaW1YSOyjQSFjyZ6hpD1TuKZXmW1fZH4ErzSwNr5Xm9up2ZGbjzCzVzFJ37NhxVEVdOKg9j4zpR/ukOAyvheaRMf3qZPRTz549mThxIr169WLPnj37taYMGDCAQYMGcdxxx3H55ZcfdgB5+umn6dOnDwMHDuSJJ54ov/T09NNPs379eh588EEGDhzIwIEDycioZgSBiISOFbPg5XMgrjk0it3/tWDsuyFSR8y5aobf1cWOzS4BznLO3eBbvgo43jl3W6Vt7vbV8HczGwG8APR1ztXYwSUlJcWlpqbut2716tX06tXLH4dRaxs3buTcc89lxYoVAa3jSDSE8yciteAcfP4YfPowdBoBv3gNfvg4NPpuSNgys0XOuZpv5nYY/Nk+mQ50rLTcwbeusuuBswCcc/81s1igJaCmBhGRyooLvLvSLpsB/cfC+U95Q53Vd0OknD8vPy0EuptZVzOLBsYCc6tsswkYBWBmvYBY4OiuLwVIly5dgrKVRkSCQO5OmHK+F2hG3gcXPRd8924RqQd+a6lxzhWb2W3AfLzh2i8651aa2YNAqnNuLvAb4Hkz+zVep+Frnb+uh4mIBKOMNTDtUm/eoJ+/DH0uCnRFIg2WX7vH++45M6/KuvsrPV8FaMiOiEh1fvgEZl7rtcpcOw86DAl0RSINmj8vP4mIyJFa+AK8eonX+ffGTxRoRGpBNzIQEWlISku8mZz/9wx0Hw2XvODd7VdEDkktNUFo6dKlzJtXcVXv7bffpn///gwcOJCUlBS+/PLL8u1GjBhBnz596N+/PzNmzAhUySJSGwV74fXLvUAz/Fdw2XQFGpHDEJ4tNYc7q+1hcs7hnCPiULPSHqGlS5eSmprKz372MwBGjRrF+eefj5mxbNkyLr30UtasWUPjxo2ZOnUq3bt3Z8uWLQwZMoTRo0eTlJTkl7pE5ChkbobpYyFjNZzzBAy9PtAViQSd8GupKZvVNmsz4Lyv79zhrT8KGzdupGfPnlx99dX07duXhx56iKFDh9K/f38eeOABwJuB+5xzzmHAgAH07du3vOWkS5cuPPDAAwwePJh+/fqxZs2a8u2vu+46hg0bxqBBg3j77bcpLCzk/vvvZ8aMGQwcOJAZM2YQHx9fPl9Ubm5u+fMePXqUT6fQrl07WrduzdHekVlE/CBtETw/EjI3wRVvKNCIHKHQa6l5/17Ytrzm19MWQknB/uuK8uDt22DRlOrf06YfnP1o9a9Vsm7dOqZMmUJ2djZvvvkm3377Lc45zj//fD7//HN27NhBu3bteO+99wDIysoqf2/Lli1ZvHgxzzzzDI8//jiTJ0/m4YcfZuTIkbz44otkZmYybNgwTj/9dB588EFSU1N5+umny98/e/ZsJkyYQEZGRvn+K/v2228pLCzUhJciDc3KOTD7JohPhmvegdbHBboikaAVfi01VQPNodYfhs6dOzN8+HA+/PBDPvzwQwYNGsTgwYNZs2YN69ato1+/fixYsIB77rmHL774gsTExPL3jhkzBoAhQ4awceNGAD788EMeffRRBg4cyKmnnkp+fj6bNm2q9rMvuugi1qxZw5w5c/jDH/6w32tbt27lqquu4qWXXvLbJTEROUzOweePwxvXQNuB3ggnBRqRoxJ6LTWHalF5sq/v0lMViR3hlwe2cByOJk2aAF6fmgkTJnDTTTcdsM3ixYuZN28e9913H6NGjeL++73b9sTEeHcHjYyMpLi4uHw/b731Fj177j+D+DfffFNjDSeffDI//vgjO3fupGXLlmRnZ3POOefw8MMPM3z48KM6PhGpI8UF8M6d8N106P8LOO8piIo99PtE5KDC78/2Ufd7s9hWVsez2o4ePZoXX3yRnJwcANLT08nIyGDLli00btyYK6+8kvHjx7N48eJD7udf//oXZTdZXrJkCQAJCQns3bu3fLv169eXb7N48WIKCgpo0aIFhYWFXHTRRVx99dVccskldXZ8InIUcnfB1Au9QHPa7+GifyvQiNSR0GupOZSyUU5+HP105plnsnr1akaMGAFAfHw8r776KuvXr2f8+PFEREQQFRXFs88+e9D9/OEPf+Cuu+6if//+lJaW0rVrV959911OO+208stSEyZMYOPGjUydOpWoqCji4uKYMWMGZsbMmTP5/PPP2bVrFy+//DIAL7/8MgMHDqyzYxWRw7Dje2/Kg+wtcMmL0PfiQFckElIs2KZaSklJcampqfutW716Nb169QpQRcFP50+kHvzwKcy8BhpFw9jp0HFooCsSaRDMbJFzLqUu9hV+l59EROpb6kvw6sWQ2N7rEKxAI+IX4Xf5SUSkvpSWwIL74b9PQ7czvEtOsU0DXZVIyAqZUOOcK7/pnNResF1+FAkaBTnw1g3w/fsw7CYY/ReIDJkfuSINUkh8h8XGxrJr1y5atGihYHMYnHPs2rWL2FiNvBCpU1lpMG0sZKyCnz0Ow24MdEUiYSEkQk2HDh1IS0vTFABHIDY2lg4dOgS6DJHQkb4Ipl/m3an8ipnQ7fRAVyQSNkIi1ERFRdG1a9dAlyEi4W7V2zDrJohvBVe/Da01qlCkPmn0k4jI0XIOvvg7zLzamyvuhk8UaEQCICRaakREAqa40DflwTToewlcMFF3CBYJEIUaEZEjtW83zLgSfvoKTp0Ap9wDGqwgEjAKNSIiR2LnOnjt596UBxe/AP00v5pIoCnUiIgcrh8/g5lXQUQUXPsudBwW6IpEBHUUFhE5PIumwKtjIKGdb8oDBRqRhkItNSJSe8tm+nWG+wattAQ+egC+/hccOwp+/hLEJga6KhGpRKFGRGpn2Ux45w7vpnIAWZu9ZQj9YFOQA7NuhLXzYOiNcNajmvJApAHSd6WIvzWE1o2SIijMgcJc71GQs/9ybZ5vWQKlRfvvtygPPpjgXYJJ7AQRIXhFOysdpv8Ctq+Es/8Gx98U6IpEpAYKNSL+dLitG85BcYEvSOytReio5WslhbWvuVEsRDfxPeIrnlcNNGX27YR/DoCoJtD6OO+mc617VzziWwfvMOctS7wpDwpy4LIZ0OPMQFckIgdhwTZLc0pKiktNTQ10GSK180Qvb8hvVY1ioX1K9SHEldR+/1FNqg8g0U0gJmH/5f1er+F5VJOaL6s82dcLZVU1aQ0jfw8Zq70JHDNWQ26ledjimvsCTq9KgacXxCXV/jgDYdVcmDUOmrSCy1+H5D6BrkgkJJnZIudcSl3sSy01InWhuAB2rPUuUWxf4fu6EnIzatg+3/san3x4oaPyclTj+r3cM+r+/VudAKLiYPTDB7Y65eyAHath+6qKoPPd617rU5mm7Q8MOi17QnTj+jmemjgHX/0DPvqjFzwvm+61NolIg6dQI3I4nIO9Ww8MLzu/h9Jib5vIGO8XdPczYc27kJ954H4SO8Iv36vf2o9WWXCpTf+g+Fbeo+vJFeuc896XsRoyVla07Gz4AkoKfBsZND9m/6DTuje0OBYio/x+iBQXwru/hqWvQp8xcOEzXnATkaCgUCNSk6I87xdvWXApCzF5uyu2SezoXZboeTYk9/UezY+puISz7JTqWzdG3V+/x1JX+l965J2czSCpo/eo3DelpBj2bKho0dnuCzxr54Er9baJiIKWPSC59/6Bpy47J+/bDTOugp++9KY7OOXe0Oz4LBLC/BpqzOws4J9AJDDZOfdoldefBE7zLTYGWjvnGviFdgk5znl9Raq2vuxaX/FLNaqx94u013neLMzJfbzlQ/ULOZzWjXAV2QhadvcevS+oWF+U77WAVe6rs+l/sPyNim3265zcpyLwHG7n5J3rYdql3v+DMc/r30ckSPmto7CZRQLfA2cAacBC4DLn3Koatr8dGOScu+5g+1VHYTkqBTm+1oAVlULMKijIqtimWZeKVpfkPt6jWVf91d5Q5GfDjjUVQSdjlfdvuG9nxTZVOycn94FWx+0fQisPtQcvuF41CzoNr9/jEQlzwdJReBiw3jn3I4CZvQ5cAFQbaoDLgAf8WI+Ek9JSyNy4/6WjbSu8yxxlYpp6v+z6/9wXXvp6vwBjEgJWttRCbFPvvjhVpyfI2bF/0DlY52SLgB//s/9Qd1cCmZsUakSCmD9DTXug8vjPNOD46jY0s85AV+CTGl4fB4wD6NSpU91WKQ3T4dywLj/L+0u9autLUa5vA4MW3aDtABh4RUXrS1Kn4L1/ihwovhXEnwLHnFKxruzSYuWgk7EKti0/8P3F+d7/OV16Eik3Z0k6j81fy5bMPNolxTF+dE8uHNQ+0GXVqKF0FB4LvOlc9TfocM5NAiaBd/mpPguTAKjphnWuFNoPqQgv23xfszZVvDc2yevzMviqivDSqlfghwlLYJh54TWpE/QYXbH+j0lANT9Kyi5FiQizF6cxYfZy8ou8voXpmXlMmOX9QdBQg40/Q0060LHScgffuuqMBW71Yy0STD5+cP/RQuAtz650e3qL9EbDdBwGKb/0Lh216QsJbdX6IoeW2KH6Gwkmdqj/WkT8rLTUsTe/mMy8QjL3FZGZV0TmvkKy8oq85X1FZOYVkrWviD37CsnMKyJrXxG7cg+8E3leUQmPzV8blqFmIdDdzLrihZmxwOVVNzKz44BmwH/9WIsEk4P9tXzhs16AadUTGsXUX00SWmq6kWCwDrWXBscfl22KS0rJzi8ms1LwKA8q+4rK15cFl6yy7fKKONiYoPiYRiTGRZHUOIpmjaNpmxRHUlwUr32zqdrtt2TmVbu+IfBbqHHOFZvZbcB8vCHdLzrnVprZg0Cqc26ub9OxwOsu2OZrEP/Iy/R+uRTtO/C1xI4w8IBcLHL4NNRe/GjOknQmzFpOXpHXo6LqZZvC4lKy8orIqhxIfK0nmZWCSnlLim95b37xQT+3aWwjkhpHk9Q4isS4KDo3b0xS4yiS4qJIbBxNki+4eK9XbBcVWf3Izv+s3UF6NQGmXVLDvSGl5n6ShmPdRzD3du+OvRGRFXfoBS/onPeUfumISIO0r7CYrVn5bMvK59Zpi8ncd+AEsJFmxEZFkFtY8/xuEYav1SSaxLgomjWueF4WUJIaR5NY6XlSXBRN46KIjKjbS+9VwxlAXFQkj4zpV6eXn4JlSLdI7eRnw4f3weIp3r1Exr7m3fhOf0WLHFKwjU4JRnvzi9iWlV8eWrZm5bMtO48tmWXLeWQfohUFoMQ5xg7rVN5isl/rSZwXVBJiGhFRx+HkSJX9Pwqm/19qqZHA+vEzePtWyE6HE+6AUydAVGygqxIJCvX1l3Socs6RnVfM1uy8/QLL1sw8tmVXhJicggMDS8v4GNomxtImMXb/r03juGvGErZnFxzwnvZJcXx178j6OLSgopYaCX4FOd4syAuf9+4hc938A2+mJiIH9dj8tfsFGvBGp0yYtZxvNuwmIbYR8TGNqnyNIr7K+sbRkViIjRp0zrFnXxFbs/LYlpXPlqx8tmVVhJeyAFP1/JlB64QY2iTG0a1VPCd1a1kptMTRNjGW1k1jiGkUWeNnTzi7V7Vhc/zonn47XvEo1Ej9++lrmHML7PkJhv8KRv5B95EROQI1jULJKyphwart7M0voqC49JD7iTBoEtOIhJhG5YEnPta7FBJfaV1CbM3BKD62EU2iGx1Vv47aXkorLXXsyi0sv/SztfzSkC+0+FpZCqsce2SEkZwQQ5vEWHq1bcppx7Wu0toSR+uEmBo7ztZWMF62CRW6/CT1pyjP6yfzv2ehWWe44BnocmKgqxIJSrkFxQx5aAH51YSWypc5CotLyS0oJqegmL353tecgqKK5/lVXitbLigmJ79iu30H6dxaWZPoyEMGI++rF4zKgtS3G3bz1Mfr9gth0ZHGOf3a0qpp7H6hZXt2PkUl+//uioo0kptWhBPvUlDFpaF2SXG0jI+p8860cvR0+UmCz+aFMOdmrwPw0Bvg9D9BTHygqxIJStuy8rl+ykLyi0uJirT9fsFXvcwR3SiC6EbRNGsSfVSfWVLqfIGoLPjUPhhtz86vWF9YfNB7plRWWOKYvXQL0Y0iykNKSudmFaElMZZ2iXG0SYylRZPoBtPBVgJHoUb8qygf/vMX+Ppf3mSCV78Nx5wa6KpEgtbKLVlc/3Iqe/OLeOnaoWTlFdXLZY7ICCMxzruvydEoLXXsKyo5IBhd9cK31W5vwNqHzgq5Pj/iHwo14j/pi72+MzvWwOCr4cyHvRmWReSIfLx6O7dPX0JiXBRv3HwCvdt530/B1FcjIsK8y1ExjYCKkY7tk+JqvNGbAo3U1tH1hhKpTnEhfPJnmHy6dw+aK96C8/+lQCNyFF76agM3Tk3l2FbxvH3rieWBJlSMH92TuKj9RxRpxJAcLrXUSN3ausxrndm+AgZcDmc9AnFJga5KJGgVl5Ty0LurmPLfnzizdzL/GDuQxtGh96NbI4akLoTed4YERkkRfPkkfPZXiGsOY6fDcT8LdFUiQS2noJjbpy3m07U7uPH/unLv2b1CevTOhYPaK8TIUVGokaOXsRpm3wxbl0LfS+Bnj0Hj5oGuSiSobcnM47qXF7IuI4eHL+rLFcd3DnRJIg2eQo0cudIS+Pop+PQvEJMAl06F3hcEuiqRoLc8LYvrpywkr7CEl64dysk9WgW6JJGgoFAjR2bnOq/vTNpC6HU+nPMExOsHr8jRmr9yG3e9vpTmTaJ55Zbj6dkmIdAliQQNhRo5PKUl8M1z3p2BG8XCxS9A34u9CVNE5Ig555j8xQb+8v5q+ndI4vmrh9A6QZO7ihwOhRqpvd0/wpxfwab/Qo+z4bx/QEKbQFclEvSKSkp5YO5Kpn2zibP7tuGJSwcSF13zhIkiUj2FGjm00lJIfQEW3A8RUXDhszDgMrXOiNSB7Pwibn1tMV+s28ktpx7L+DN76nb/IkdIoUYObs9PMPc22PA5HDvKu4leooZcitSFzbv3cf2Uhfy4I5e/XtyPXwztFOiSRIKaQo1UzzlY9DJ8eJ+3fN4/YfA1ap0RqSNLN2dyw5SFFBSXMuW6YZzYrWWgSxIJego1cqCsdJh7O/zwMXQ9GS6YCEn6C1KkrsxbvpVfz1hK66YxvD5uON1aa4STSF1QqJEKzsHSafDBBCgtgp89DinXQ4SmCBOpC845nvvsR/76wRoGd0ri+atTaBEfE+iyREKGQo149m6Dd+6C79+HTifAhROh+TGBrkokZBSVlHLf7BXMSN3MeQPa8dgl/YmN0ggnkbqkUBPunIPlb8K830JxPox+BI6/Wa0zInUoK6+IW15dxNc/7OL2kd349ek9NMJJxA8Uan1qe24AACAASURBVMJZzg5479ew+h3oMNQbqt2ye6CrEgkpm3bt45cvf8um3ft4/OcDuGRIh0CXJBKyFGrC1co58N7dULAXTv8TnHA7RKgpXKQuLfppD+OmplJc6njl+uMZfkyLQJckEtIUasLNvt3epaYVb0HbgXDRc9C6V6CrEgk573y3hd+88R3tEmN58dqhHNMqPtAliYS8WoUaM5sFvAC875wr9W9J4jdr5sE7d0LeHjjtPjjpLoiMCnRVIiHFOcfTn6zn7wu+Z2iXZvz7qhSaN4kOdFkiYaG2LTXPAL8EnjKzN4CXnHNr/VeW1Km8Pd4w7e+mQ3I/uGoWtOkX6KpEQk5hcSkTZi3nrcVpXDSoPY9e3I+YRrqsK1JfahVqnHMfAR+ZWSJwme/5ZuB54FXnXJEfa5Sjse4jb5qDnAw4+Xdw8nhopL8aRepa5r5CbnplEd9s2M1dp3fnzlHdMd2BW6Re1bpPjZm1AK4ErgKWAK8BJwHXAKf6ozg5AstmwscPQlYaRDWGolxodRyMnQbtBwe6OpGQtHFnLr98eSHpe/L4xy8GcuEgzY8mEgi17VMzG+gJvAKc55zb6ntphpml+qs4OUzLZsI7d0BRnrdclAsRjeCEOxRoRPzk2w27GfdKKga8duPxDO3SPNAliYSt2t5h7SnnXG/n3COVAg0AzrmUmt5kZmeZ2VozW29m99awzaVmtsrMVprZtMOoXar6+MGKQFOmtBj+80hg6hEJcbOXpHHl5G9o3jia2b86UYFGJMBqG2p6m1lS2YKZNTOzXx3sDWYWCUwEzgZ6A5eZWe8q23QHJgAnOuf6AHcdTvFSSXEBZG2u/rWstPqtRSTEOed4csH3/HrGdwzunMSsX51Al5ZNAl2WSNirbai50TmXWbbgnNsD3HiI9wwD1jvnfnTOFQKvAxdU3S8w0bc/nHMZtaxHKsvZAVPOr/n1RN3BVKSuFBSX8OsZS/nnx+u4ZEgHpl53PEmN1flepCGobaiJtErd+H2tMIf6Lm4PVG46SPOtq6wH0MPMvjKz/5nZWdXtyMzGmVmqmaXu2LGjliWHie2rYPJI2LoUho2DqLj9X4+Kg1H3B6Y2kRCzO7eQKyd/w5ylWxg/uiePXdKf6EaaJ02koajt6KcP8DoF/9u3fJNvXV18fne80VMdgM/NrF/lViEA59wkYBJASkqKq4PPDQ3ffwhvXgfRTeCX86D9EG8Op7LRT4kdvEDT/9JAVyoS9H7YkcN1Ly9ka1Y+/7psEOcNaBfokkSkitqGmnvwgswtvuUFwORDvCcd6FhpuYNvXWVpwDe++9xsMLPv8ULOwlrWFZ6cg/89Ax/eB8l94bLXIdHXCNb/UoUYkTr23x92cfOri2gUYUy/cThDOjcLdEkiUo3a3nyvFHjW96ithUB3M+uKF2bGApdX2WYO3s38XjKzlniXo348jM8IPyVF8N5vYPEUOO5cGDPJa6kREb94I3Uz/2/2cjq3aMJL1w6lY/PGgS5JRGpQ2/vUdAcewRvFFFu23jl3TE3vcc4Vm9ltwHwgEnjRObfSzB4EUp1zc32vnWlmq4ASYLxzbtcRH02o27cbZl4NG7+A//uNN39ThK7ni/hDaanj7wvWMvHTHzixWwueuWIIiXGaK02kIavt5aeXgAeAJ4HT8OaBOuRvU+fcPGBelXX3V3rugLt9DzmYnetg2i+8YdsX/RsGjA10RSIhK7+ohN++8R3vLtvK2KEdeejCvkRF6g8IkYautqEmzjn3sZmZc+4n4I9mtgjQsJr68MOn8MY1EBEF17wDnYYHuiKRkLUzp4Abp6ayZFMmE84+jnEnH6M5nESCRG1DTYGZRQDrfJeU0oF4/5Ul5Ra+APPGQ8secPkMaNY50BWJhKx12/dy3ZSFZGQX8OwVgzm7X9tAlyQih6G2oeZOoDFwB/AQ3iWoa/xVlAAlxfDh7+Gb56D7mXDxCxDbNNBViYSsL9ft5JbXFhHTKJIZN41gYMekQ79JRBqUQ4Ya3432fuGc+y2Qg9efRvwpP8u7/8z6j2D4rXDmQxARGeiqRELKnCXpPDZ/LVsy80iMiyIrr4geyQm8cG0KHZpphJNIMDpkqHHOlZjZSfVRjAC7N8D0sbBrPZz3TxhybaArEgk5c5akM2HWcvKKSgDIzCsiwuCXJ3ZWoBEJYrW9/LTEzOYCbwC5ZSudc7P8UlW4+ulrmHEllJbAVbOh68mBrkgkZDjnyM4rZvvefB56d1V5oClT6uBfn/zA2GHqtyYSrGobamKBXcDISuscoFBTV5a8Bu/c6XUEvnwmtDg20BWJBI3cgmK2Z+ezPbuAjL35bMvynm/fm0+Gb/327HwKiksPup8tmXn1VLGI+ENt7yisfjT+UloKH/8RvvondD0FLp0CcboFuwh494vZsbegPLB4X/Mrlvfmk5FdQE5B8QHvbRwdSZumsbRuGsOgTkkkN42ldUIMyU1j+dM7K9mZU3jAe9olxR2wTkSCR23vKPwSXsvMfpxz19V5ReGkIAdmjYO170HKdXD23yBSdyyVhqty59p2SXGMH92TCwe1P+z9FJWUsjOnoDyoZGTns61ScMnwBZbMfUUHvDe6UQTJTWNIToilV5umnNLDCypl65ITY0luGkt8TM0/3kpK3X59agDioiIZP7rnYR+LiDQctb389G6l57HARcCWui8njGSlwbSxkLHSCzPDxoFu8CUNWNXOtemZeUyYtRygPNiUljp25RZ6wWRv1daVihaXXbkFuCp/JkVGGK0TYmjdNJbOLRozrGtzkpt6y22axpYHl8S4qKO+GV5ZvXUR0ESk4TBX9SdLbd7k3YjvS+fcCXVf0sGlpKS41NTU+v7YupW2CF6/DIry4JKXoPvpga5I5JBOfPRj0jPzD1gf2yiCnm2bkpGdz469BRSX7v8zxQxaNInxWlJ8waR1ghdS2iRWPG/eJJrICAV7kXBjZouccyl1sa/attRU1R1oXRcFhJ3lb8Lbt0J8Mlw9F1ofF+iKRPbjnGNbdj7rtuewLiOH9Rl7Wbc9p9pAA5BfXErT2EZ0b92S5KYxvn4sFS0rLeNjNG+SiNSL2vap2cv+fWq2Aff4paJQ5Rz85xH47K/QaQT84lVo0jLQVUkYKy11bMnK84LL9hzWZewtf763Usfb5k2i6d46nibRkeQWlhywn/ZJcbxy/fH1WbqISLVqO/opwd+FhLSiPJhzC6ycDQMuh/P+AY1iAl2VhInSUkfanrzy0LLOF2DWZ+Swr1JIaZUQQ/fW8YwZ3J5uyQl0bx1P99bxtIj3/q9W7VMD6lwrIg1LbVtqLgI+cc5l+ZaTgFOdc3P8WVxI2LsNpl8GW5bA6X+CE+9Uh2Dxi5JSx6bd+1i3vSy8eF9/2JFDflHF/VnaNI2le3I8vxjake6tE+ieHE+3VvE0axJ90P2rc62INHS16ihsZkudcwOrrFvinBvkt8pqEFQdhbd+5wWavEwYMwl6nRvoiiQEFJWU8tOufazP2Mv3vn4v67bv5ceduRRWurlc+6Q4uvlaW7onx9OtdQLdWseTGKfbBohIwxGIjsLV9fI70k7G4WH1uzDrRohrDtd9AG37B7oiCTKFxaVs3JXLuu05fL/du1y0LmMvG3bmUlRS8cdIx+ZxdG+dwCk9WnkhJtkLLwe7T4uISCiq7U+9VDN7ApjoW74VWOSfkoKcc/DVP+CjP0H7wTB2OiQkB7oqCaBD3bAuv6iEDTtzKy4Z+fq8bNy1jxLf8Ggz6Ny8Md1aJzCqV7Kvv0sCx7ZuQuNohRcREaj95acmwB+A0/FGQS0AHnbO5R70jX7QoC8/FRd48zd9Nx36XgwXTIQo3XY9nFXXuTYq0jilRyvMjPUZOfy0K5eyW7tERhidWzQuDy3eZaN4jm0VT2xUZICOQkTEf+r98pMvvNxbFx8YsnJ3wutXwOb/wakT4JR71CFYeGz+2gNmgy4qcXy0OoPurePp1TaB8wa0K+/30rVlE2IaKbyIiByJ2o5+WgD83DmX6VtuBrzunBvtz+KCRsZqmHYp5GTAJS96rTQi1DzrswEL7j6lfosREQlxtb3NZ8uyQAPgnNuD7ijsWbcAJp/hXXq6dp4CjeyneQ3DpDUbtIhI3attqCk1s05lC2bWhWpm7Q4rzsH/nvVaaJp3gRs/hQ5DAl2VNCDrM3LIyS864CqkblgnIuIftR028XvgSzP7DK/l/P+AcX6rqqErKYJ542HRS3Dcud49aKKbBLoqaUCy8ooYNzWVhLgobj2tG5O/2KAb1omI+FltOwp/YGYpeEFmCTAHqL6zQKjbtxveuAY2fA4n/RpG3g8RmqxPKpSUOm6fvoTNe/Yx7cbhDO3SnF+e2DXQZYmIhLzadhS+AbgT6AAsBYYD/wVG+q+0Bmjneu9yU+YmuPBZGHh5oCuSBujR91fz+fc7eGRMP4Z2aR7ockREwkZtmxjuBIYCPznnTgMGAZkHf0uI+fEzmDwK8jPhmncUaKRaby1K4/kvNnDNiM5cNqzTod8gIiJ1prahJt85lw9gZjHOuTVA+PR0TH0JXh0DCW3hxk+g84hAVyQN0JJNe5gwezkjjmnBfef2DnQ5IiJhp7YdhdN8M3PPARaY2R7gJ/+V1UCUlsD838M3z0K3M7x70MQ2DXRV0gBtz87nplcWkdw0hmeuGExUpPpZiYjUt9p2FL7I9/SPZvYpkAh84LeqGoL8bHjzOli/AIb/Cs54CCI1x44cKL+ohHGvLCK3oJhXrj+RZjXcm0ZERPzrsH9LO+c+80chDcqejTBtLOxaB+c+CSnXBboiaaCcc/y/Wcv5bnMmk64aQs82CYEuSUQkbPm16cHMzgL+CUQCk51zj1Z5/VrgMSDdt+pp59xkf9ZUrWUz4eMHISsNmrSCwlyvVebKWXCMbmUvNZv8xQZmLUnn7jN6cGafNoEuR0QkrPkt1JhZJDAROANIAxaa2Vzn3Koqm85wzt3mrzoOadlMeOcOKPLddic3AzA49SEFGjmo/6zN4JH3V/Ozfm24fWS3QJcjIhL2/NmbcRiw3jn3o3OuEHgduMCPn3dkPn6wItCUc/DtvwNSjgSHH3bkcPv0JfRs05THfz4A04zsIiIB589Q0x7YXGk5zbeuqovNbJmZvWlmHavbkZmNM7NUM0vdsWNH3VaZlXZ46yXsZeUVcePUVKIjI3j+6iE0jlYHchGRhiDQ407fAbo45/oDC4Ap1W3knJvknEtxzqW0atWqbitI7HB46yWslZQ67pi+hE279vHslUPo0KxxoEsSEREff4aadKByy0sHKjoEA+Cc2+WcK/AtTgbqf5rrUfdDVNz+66LivPUiVfztgzV89v0OHrygL8O6agoEEZGGxJ+hZiHQ3cy6mlk0MBaYW3kDM2tbafF8YLUf66le/0vhvKcgsSNg3tfznvLWi1Qye0ka//78R64a3pnLj9cUCCIiDY3fOgM454rN7DZgPt6Q7hedcyvN7EEg1Tk3F7jDzM4HioHdwLX+queg+l+qECMHtXRzJve8tZzhxzTn/vM0BYKISENkzrlA13BYUlJSXGpqaqDLkDCSkZ3PeU9/SVRkBHNvO4nmumOwiEidMbNFzrmUutiXhm2IHETZFAh784t565YTFGhERBowhRqRGjjn+H+zl7N0cybPXTmEXm01mamISEMW6CHdIg3WC19uYNbidO46vTtn9dUUCCIiDZ1CjUg1Pvt+B3+Zt5qz+7bhjpHdA12OiIjUgkKNSBU/7sjh9mmL6ZGcwOM/H0BEhKZAEBEJBgo1IpVk5xdxw9RUGkVG8PzVKTSJUbczEZFgoVAj4lNS6rjTNwXCM1cMpmNzTYEgIhJM9GeoiM9j89fy6dod/PnCvgw/pkWgyxERkcOklhoRYM6SdJ777AeuOL4TVw7vHOhyRETkCCjUSNhblpbJPW8tY1jX5jxwXp9AlyMiIkdIoUbCWkZ2PuOmLqJlfAzPXjGY6Eb6lhARCVbqUyNhK7+ohJteXURWXhFv3XICLeJjAl2SiIgcBYUaCUvOOe6bs4IlmzJ59orB9G6nKRBERIKd2tolLL341UbeXJTGnaO6c3a/toEuR0RE6oBCjYSdz7/fwcPvrWJ0n2TuHKUpEEREQoVCjYSVDTtzuc03BcITlw7UFAgiIiFEoUbCxt78Im6cmkpkhGkKBBGREKSf6hIWSkodd76+lI07c3nl+uM1BYKISAhSS42Ehcc/XMsnazJ44LzejDhWUyCIiIQihRoJeW8vTefZ//zAZcM0BYKISCjT5acQM2dJOo/NX8uWzDzaJcUxfnRPLhzUPtBlBczytCx+9+YyhnVpzp/O74OZOgaLiIQqhZoQMmdJOhNmLSevqASA9Mw8JsxaDhCWwSZjbz43Tk2lZXwMz1ypKRBEREKdfsqHkMfmry0PNGXyikp4bP7aAFUUOAXFJdz8ijcFwqSrh9BSUyCIiIQ8hZoQsiUzr8b1zrl6riZwnHPcN3sFizdl8vjPB9CnXWKgSxIRkXqgUBMivlq/E2roLuKAkX//jH99vI7Nu/fVa12B8NJXG3ljURp3jOzGOf01BYKISLhQqAlyzjle/moDV7/4LckJMcRU6TcSGxXB2GEdaZ0Qw98XfM///e1Txk76LzNTN7M3vyhAVfvPl+t28vC81ZzZO5m7Tu8R6HJERKQeWbBdlkhJSXGpqamBLqNBKCwu5YG5K5j+7WZO79WaJ38xkI9XZ9Q4+mnz7n3MWZLOW4vT2LhrH7FREZzVpw1jBnfgxG4tiQzyKQM27szlgolfkdw0hlm/OpF43TFYRKTBM7NFzrmUOtmXQk1w2pVTwC2vLubbjbv51anH8tsze9Z6HiPnHIs3ZTJrcRrvfLeF7PxikpvGcOGg9lw8uAM9khP8XH3d25tfxJhnvmZHTgFzbz2JTi10x2ARkWCgUBPmoWb11mxumJLKzpwC/nZJfy4YeOTDtfOLSvhkTQazFqfx6dodlJQ6+rZvysWDO3D+gHa0CIJRQ6WljnGvpPLp2h28ct0wTujWMtAliYhILSnUhHGo+WDFNu6euZSE2EZMuiqFAR2T6mzfO3MKmLt0C7OWpLEiPZtGEcapPVtz8eD2jOzVmphGkXX2WXXpsflrmPjpD/zp/D5cc0KXQJcjIiKHoS5DjV87HZjZWcA/gUhgsnPu0Rq2uxh4ExjqnAvfxHIQzjme/mQ9f1/wPQM6JjHpqiEkN42t089oGR/DdSd15bqTurJ2215mLU5j9pJ0Plq9ncS4KM4b0JYxgzswqGNSg7kz7zvfbWHipz8wdmhHrh6hKRBERMKZ31pqzCwS+B44A0gDFgKXOedWVdkuAXgPiAZuO1SoCceWmrzCEsa/+R3vLtvKhQPb8ejF/YmNqp9Wk5JSx5frdzJrcRrzV24jv6iUY1o2Yczg9lw4qD0dmgWu78qK9Cwuee5r+rVP5LUbhuuOwSIiQShYWmqGAeudcz8CmNnrwAXAqirbPQT8FRjvx1qC1tasPMZNXcSKLVnce/Zx3HTyMfXaShIZYZzSoxWn9GjF3vwi3l++jbcWp/H4h9/z+IffM/yY5lw8uANn92tbr6ONduwt4MapqTRvHM2zVw5RoBEREb+GmvbA5krLacDxlTcws8FAR+fce2ZWY6gxs3HAOIBOnTr5odSGafGmPYybuoj8ohImX53CqF7JAa0nITaKS4d25NKhHdm8ex+zl6Qza3Ea499cxv1vr+Ssvm0YM7g9Jxzr3+HhBcUl3PzqIvbsK+TNm0/QFAgiIgIEcEJLM4sAngCuPdS2zrlJwCTwLj/5t7KG4a1FaUyYtZw2ibFMu/H4BjfMumPzxtwxqju3j+zG4k2ZvLU4jXe/28LsJem0aRrrGx7enu51XLdzjvvnrGTRT3t4+vJB9G2vKRBERMTjz1CTDnSstNzBt65MAtAX+I/vckobYK6ZnR/OnYVLSh1//WANkz7/kRHHtOCZKwbTrEl0oMuqkZkxpHMzhnRuxv3n9uaTNRm8tSiN57/4kec++4F+7RO5eHB7zquj4eFTvt7IjNTN3HZaN87t364OjkBEREKFPzsKN8LrKDwKL8wsBC53zq2sYfv/AL8N547C2flF3DF9Cf9Zu4Orhnfm/vN6ExUZnH1FyoaHv7U4jZVbKoaHXzKkPacdd2TDw79av5OrX/yW03q2ZtJVQ2p9s0EREWm4gqKjsHOu2MxuA+bjDel+0Tm30sweBFKdc3P99dnBaMPOXG6YspCfdu3jzxf25crhwT08ufLw8DXbspm9OL18eHhS4yjO69+OMYPbM7CWw8N/2pXLrdMWc0zLJjz5iwEKNCIicgDdfK8B+HLdTm6dtpgIg2euGMKIY1sEuiS/ONjw8IsGd6B9Uly178spKGbMM1+xPbuAubedSOcWTeq5chER8RfdUThEQo1zjpe/3sif31tNt1bxPH91StjMWVR5ePg3G3YDMOKYFowZ3J6z+7Xlo1XbyyfmjGkUQUFxKa/ecDwnagoEEZGQolATAqGmsLiU+99ewesLN3N6r2T+MXZg2M4qXXl4+MZd+4iKMEodlFT6vxkVaTx2yYDyGcdFRCQ0KNQEeaipPMP2racdy2/OqP0M26GsbPbwq174hn2FJQe83j4pjq/uHRmAykRExF+CoqOwVG/VlmxunOrNsP3PsQOPaobtUFM2PDyvmkADsCUzr54rEhGRYBKc44WD1AcrtnHJc19TXFrKzJtGKNDUoF0NHYZrWi8iIgIKNfXCOcdTH6/j5lcX0T05gXduO4kBHZMCXVaDNX50T+KqTNgZFxXJ+NE9A1SRiIgEA11+8rO8whJ++8Z3vLd8KxcNas8jY/rV2wzbwaqsM3DZ6Kd2SXGMH91TnYRFROSgFGr8aEtmHjdOTWXV1uyAzLAdzC4c1F4hRkREDotCjZ8s+mkPN73izbD9wjUpjDwusDNsi4iIhDqFGj94I3Uzv5+9grZJsUy/8fg6n6laREREDqRQU4dKSh2PzFvN5C83cMKxLZh4ecOeYVtERCSUKNTUkaw8b4btz77fwTUjOnPfucE7w7aIiEgwUqipAxt25nL9lIVs2rWPv1zUj8uP7xTokkRERMKOQs1R+mLdDm59bTGREcarNxzP8GNCc4ZtERGRhk6h5ghVnmG7e2tvhu2OzcNjhm0REZGGSKHmCBQWl/KHOSuYkbqZM3on8+QvwneGbRERkYZCv4kP086cAm55dRELN+7httO6cfcZPTTDtoiISAOgUHMYKs+w/dRlgzh/QLtAlyQiIiI+CjW19MGKrfx6xnckxkXxxs0j6N9BE1KKiIg0JAo1h+DNsL2eJz/6noEdk5h01RBaN40NdFkiIiJShULNQewrLGb8G8t4b/lWxgxqz180w7aIiEiDpVBTg/TMPMb5ZtiecPZxjNMM2yIiIg2aQk01Fv20m5teWURBUSkvXjOU045rHeiSRERE5BAUaoA5S9J5bP5atmTmkdg4ir15RXRo3pjXx6XQrbVm2BYREQkGYR9q5ixJZ8Ks5eQVlQCQua+ICIObTj5GgUZERCSIhP000o/NX1seaMqUOpj46Q8BqkhERESORNiHmi2ZeYe1XkRERBqmsA817ZLiDmu9iIiINExhH2rGj+5JXJV7z8RFRTJ+dM8AVSQiIiJHIuw7Cl84qD1A+eindklxjB/ds3y9iIiIBIewDzXgBRuFGBERkeDm18tPZnaWma01s/Vmdm81r99sZsvNbKmZfWlmvf1Zj4iIiIQuv4UaM4sEJgJnA72By6oJLdOcc/2ccwOBvwFP+KseERERCW3+bKkZBqx3zv3onCsEXgcuqLyBcy670mITwPmxHhEREQlh/uxT0x7YXGk5DTi+6kZmditwNxANjKxuR2Y2DhgH0KlTpzovVERERIJfwId0O+cmOueOBe4B7qthm0nOuRTnXEqrVq3qt0AREREJCv5sqUkHOlZa7uBbV5PXgWcPtdNFixbtNLOfjrI2f2gJ7Ax0EQ2EzoVH58Gj8+DReaigc+HRefDU2Y3h/BlqFgLdzawrXpgZC1xeeQMz6+6cW+dbPAdYxyE45xpkU42ZpTrnUgJdR0Ogc+HRefDoPHh0HiroXHh0HjxmllpX+/JbqHHOFZvZbcB8IBJ40Tm30sweBFKdc3OB28zsdKAI2ANc4696REREJLT59eZ7zrl5wLwq6+6v9PxOf36+iIiIhI+AdxQOIZMCXUADonPh0Xnw6Dx4dB4q6Fx4dB48dXYezDndGkZERESCn1pqREREJCQo1IiIiEhIUKg5CDN70cwyzGxFpXXNzWyBma3zfW3mW29m9pRv8s5lZja40nuu8W2/zsyCboSXmXU0s0/NbJWZrTSzO33rw+pcmFmsmX1rZt/5zsOffOu7mtk3vuOdYWbRvvUxvuX1vte7VNrXBN/6tWY2OjBHdHTMLNLMlpjZu77lcD0PGytNzJvqWxdW3xsAZpZkZm+a2RozW21mI8LtPJhZT9//g7JHtpndFW7nAcDMfu37ObnCzKb7fn76/2eEc06PGh7AycBgYEWldX8D7vU9vxf4q+/5z4D3AQOGA9/41jcHfvR9beZ73izQx3aY56EtMNj3PAH4Hm+S0rA6F77jifc9jwK+8R3fTGCsb/1zwC2+578CnvM9HwvM8D3vDXwHxABdgR+AyEAf3xGcj7uBacC7vuVwPQ8bgZZV1oXV94bvGKYAN/ieRwNJ4XgeKp2PSGAb0DnczgPeNEkbgDjf8kzg2vr4GRHwg2/oD6AL+4eatUBb3/O2wFrf838Dl1XdDrgM+Hel9fttF4wP4G3gjHA+F0BjYDHefGY7gUa+9SOA+b7n84ERvueNfNsZMAGYUGlf5dsFywPvDuEf483X9q7vuMLuPPjq3siBoSasvjeARLxfYhbO56HKsZ8JfBWO54GKuR+b+77n3wVG18fPCF1+OnzJzrmt2vWpSwAABYdJREFUvufbgGTf8+om8Gx/kPVBydcsOAivlSLszoXvkstSIANYgPeXQ6Zzrti3SeVjKj9e3+tZQAtC4DwA/wB+B5T6llsQnucBwAEfmtki8ybfhfD73ugK7ABe8l2SnGxmTQi/81DZWGC673lYnQfnXDrwOLAJ2Ir3Pb+IevgZoVBzFJwXHcNmTLyZxQNvAXc557IrvxYu58I5V+KcG4jXUjEMOC7AJdU7MzsXyHDOLQp0LQ3ESc65wcDZwK1mdnLlF8Pke6MR3qX6Z51zg4BcvMss5cLkPADg6ytyPvBG1dfC4Tz4+gxdgBd22wFNgLPq47MVag7fdjNrC+D7muFbX9MEnoc7sWeDZGZReIHmNff/27u7ECvqMI7j3x8YaiprkXddqLAhFGmRoWghJYIhUSJYCUV2UUEFRoQlBN0JRuBFN4EQhHRjal6ERtmLGKlhur6DYNRSmvQimRRiTxf/Z3aHZVPcN9eZ3weGPfN6zjycmX3Of/4zT8TmnNzKWABExB/A55Qm1MmSqqdz1/epZ39zfgfwK9d/HOYBD0v6nlKI9gFgPe2LA9Dzq5SI+AXYQkl223ZsdAPdEbEnxzdRkpy2xaGyGNgfEWdyvG1xWAicioizEXER2Ew5bwz7OcJJzdXbRm+Nqqco/Uuq6U9mb/Y5wLlsbtwBLJJ0U2avi3LadUOSgA3AsYh4uzarVbGQNEXS5Hw9ntKv6BgluVmWi/WNQxWfZcDO/JW2DXgse/xPAzqBvSOzF4MXEa9FxK0RMZXSxL4zIlbQsjgASJogaVL1mvKdPkzLjo2IOA38KKmqtvwgcJSWxaHmcXovPUH74vADMEfSjfn/o/o+DP854lp3KBrNA+VL+TOl4GY38AzlOt9nlIrinwI357IC3qH0sTgE3FPbzkrgZA5PX+v9GkAc5lOaS7uAAzk81LZYAHcC32UcDgNv5PTpeaCdpDQ3j83p43L8ZM6fXtvWmozPCWDxtd63QcRkAb13P7UuDrnPB3M4AqzJ6a06NvLzzwK+zeNjK+WunTbGYQKllaGjNq2NcXgTOJ7nyvcpdzAN+znCZRLMzMysEXz5yczMzBrBSY2ZmZk1gpMaMzMzawQnNWZmZtYITmrMzMysEZzUmNmASfo6/06V9MQQb/v1/t7LzOz/+JZuMxs0SQuAVyJiyVWsMyZ668D0N/98REwcis9nZu3glhozGzBJ5/PlWuA+SQckrcrCn+sk7ZPUJenZXH6BpF2StlGeMIqkrVkM8khVEFLSWmB8bm9j/b3y6avrJB2WdEjS8tq2v5C0SdJxSRvzaaZIWivpaH6Wt0YyRmY2csZceREzsytaTa2lJpOTcxExW9JYYLekT3LZu4E7IuJUjq+MiN+y9MQ+SR9GxGpJL0QpHtrXUsrTa2cCt+Q6X+W8u4DbgZ+A3cA8SceAR4EZERFVqQszax631JjZcFhEqWlzANhDeUx8Z87bW0toAF6SdBD4hlK8rpPLmw98EKVi+hngS2B2bdvdEfEvpZzHVOAc8DewQdJS4MKg987MRiUnNWY2HAS8GBGzcpgWEVVLzV89C5W+OAuBuRExk1Jba9wg3vef2utLQNVv515K5eglwPZBbN/MRjEnNWY2FP4EJtXGdwDPS7oBQNJtWcW6rw7g94i4IGkGMKc272K1fh+7gOXZb2cKcD+XqdwraSKluODHwCrKZSszayD3qTGzodAFXMrLSO8B6ymXfvZnZ92zwCP9rLcdeC77vZygXIKqvAt0SdofEStq07cAcymVsQN4NSJOZ1LUn0nAR5LGUVqQXh7YLprZaOdbus3MzKwRfPnJzMzMGsFJjZmZmTWCkxozMzNrBCc1ZmZm1ghOaszMzKwRnNSYmZlZIzipMTMzs0b4DygiYL144MyWAAAAAElFTkSuQmCC\n","text/plain":["<Figure size 648x288 with 1 Axes>"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"7Z31mvHGQT3y"},"source":["## Residual bottleneck block\n","\n","A bottleneck block is often useful for better efficiency, especially when importing a model to mobile devices.\n","The residual bottleneck block is similar to the standard residual block, but the plain block part has a different architecture:\n","it consists of 3 convolutional layers, and the first two convolutional layers have a smaller number of channels.\n","\n","Here is the specification of the bottleneck block:\n","\n","1. Spatial Batch normalization\n","2. ReLU\n","3. Convolutional layer with `Cout // 4` 1x1 filters, stride 2 if downsampling; otherwise stride 1\n","4. Spatial Batch normalization\n","5. ReLU\n","6. Convolutional layer with `Cout // 4` 3x3 filters, with zero-padding of 1\n","7. Spatial Batch normalization\n","8. ReLU\n","9. Convolutional layer with `Cout` 1x1 filters\n","\n","Don't forget to add the residual connection!"]},{"cell_type":"code","metadata":{"id":"7pUtZoVsQT31","executionInfo":{"status":"ok","timestamp":1606124724067,"user_tz":-480,"elapsed":1111,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}}},"source":["class ResidualBottleneckBlock(nn.Module):\n","  def __init__(self, Cin, Cout, downsample=False):\n","    super().__init__()\n","\n","    self.block = None\n","    self.shortcut = None\n","    ############################################################################\n","    # TODO: Implement residual bottleneck block.                               #\n","    # Inputs:                                                                  #\n","    # - Cin: number of input channels                                          #\n","    # - Cout: number of output channels                                        #\n","    # - downsample: add downsampling (a conv with stride=2) if True            #\n","    # Store the main block in self.block and the shortcut in self.shortcut.    #\n","    ############################################################################\n","    # Replace \"pass\" statement with your code\n","    stride = 2 if downsample else 1\n","    self.block = nn.Sequential(\n","      nn.BatchNorm2d(Cin),\n","      nn.ReLU(),\n","      nn.Conv2d(Cin, Cout//4, 1, padding=0, stride=stride),\n","      nn.BatchNorm2d(Cout//4),\n","      nn.ReLU(),\n","      nn.Conv2d(Cout//4, Cout//4, 3,padding=1),\n","      nn.BatchNorm2d(Cout//4),\n","      nn.ReLU(),\n","      nn.Conv2d(Cout//4, Cout, 1, padding=0)\n","    )\n","    nn.shortcut = nn.Sequential()\n","    if Cin != Cout or downsample:\n","      self.shortcut = nn.Sequential(nn.Conv2d(Cin, Cout, 1,\n","                            stride=stride, padding=0))\n","    ############################################################################\n","    #                                 END OF YOUR CODE                         #\n","    ############################################################################\n","\n","  def forward(self, x):\n","    return self.block(x) + self.shortcut(x)"],"execution_count":62,"outputs":[]},{"cell_type":"code","metadata":{"id":"vqETnXH5QT37","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606124727233,"user_tz":-480,"elapsed":1085,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"8880f585-aaa6-47d5-cd11-d33e17a4e390"},"source":["data = torch.zeros(2, 3, 5, 6)\n","model = ResidualBottleneckBlock(3, 10)\n","if list(model(data).shape) == [2, 10, 5, 6]:\n","  print('The output of ResidualBlock without downsampling has a *correct* dimension!')\n","else:\n","  print('The output of ResidualBlock without downsampling has an *incorrect* dimension! expected:', [2, 10, 5, 6], 'got:', list(model(data).shape))\n","\n","data = torch.zeros(2, 3, 5, 6)\n","model = ResidualBottleneckBlock(3, 10, downsample=True)\n","if list(model(data).shape) == [2, 10, 3, 3]:\n","  print('The output of ResidualBlock with downsampling has a *correct* dimension!')\n","else:\n","  print('The output of ResidualBlock with downsampling has an *incorrect* dimension! expected:', [2, 10, 3, 3], 'got:', list(model(data).shape))"],"execution_count":63,"outputs":[{"output_type":"stream","text":["The output of ResidualBlock without downsampling has a *correct* dimension!\n","The output of ResidualBlock with downsampling has a *correct* dimension!\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"QnRc5hiHudMP"},"source":["By running the following script, you can check the architecture of ResNet-47 with bottlenecks.\n","\n","Caution: it is long!"]},{"cell_type":"code","metadata":{"id":"md6xmG-Aucrx","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606124732271,"user_tz":-480,"elapsed":1078,"user":{"displayName":"young","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjuSwIMBj1x2bJOgTT5-3wA8Yt5zgRoz7viNvKn=s64","userId":"11870005096077095950"}},"outputId":"b0b70a24-7ee1-49ff-bf68-4200b592d428"},"source":["# example of specification\n","networks.update({\n","  'resnet47': {\n","    'block': ResidualBottleneckBlock,\n","    'stage_args': [\n","      (32, 32, 5, False),\n","      (32, 64, 5, True),\n","      (64, 128, 5, True),\n","    ],\n","  },\n","})\n","\n","print(get_resnet('resnet47'))"],"execution_count":64,"outputs":[{"output_type":"stream","text":["ResNet(\n","  (cnn): Sequential(\n","    (0): ResidualBottleneckBlock(\n","      (block): Sequential(\n","        (0): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","        (1): ReLU()\n","        (2): Conv2d(3, 8, kernel_size=(1, 1), stride=(1, 1))\n","        (3): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","        (4): ReLU()\n","        (5): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","        (6): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","        (7): ReLU()\n","        (8): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))\n","      )\n","      (shortcut): Sequential(\n","        (0): Conv2d(3, 32, kernel_size=(1, 1), stride=(1, 1))\n","      )\n","    )\n","    (1): ResNetStage(\n","      (net): Sequential(\n","        (0): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","        (1): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","        (2): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","        (3): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","        (4): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","      )\n","    )\n","    (2): ResNetStage(\n","      (net): Sequential(\n","        (0): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(32, 16, kernel_size=(1, 1), stride=(2, 2))\n","            (3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","          (shortcut): Sequential(\n","            (0): Conv2d(32, 64, kernel_size=(1, 1), stride=(2, 2))\n","          )\n","        )\n","        (1): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","        (2): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","        (3): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","        (4): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","      )\n","    )\n","    (3): ResNetStage(\n","      (net): Sequential(\n","        (0): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(64, 32, kernel_size=(1, 1), stride=(2, 2))\n","            (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","          (shortcut): Sequential(\n","            (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))\n","          )\n","        )\n","        (1): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","        (2): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","        (3): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","        (4): ResidualBottleneckBlock(\n","          (block): Sequential(\n","            (0): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (1): ReLU()\n","            (2): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))\n","            (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (4): ReLU()\n","            (5): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n","            (6): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n","            (7): ReLU()\n","            (8): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))\n","          )\n","        )\n","      )\n","    )\n","  )\n","  (fc): Linear(in_features=128, out_features=10, bias=True)\n",")\n"],"name":"stdout"}]}]}